In [18]:
# Q1 Answer

# Assign known probabilities from the problem
p_m = 0.81
p_u18 = 0.15
p_mc_u18 = 0.08

# From P(M)=0.81, I get P(M^c)=1 - 0.81 = 0.19
# I know P(U18)=0.15 and P(M^c ∩ U18)=0.08, so P(M ∩ U18) = 0.15 - 0.08 = 0.07
# P(M ∩ U18^c) = P(M) - P(M ∩ U18) = 0.81 - 0.07 = 0.74
# P(M^c ∩ U18^c) = P(M^c) - P(M^c ∩ U18) = 0.19 - 0.08 = 0.11
p_mc = 1 - p_m
p_m_u18 = p_u18 - p_mc_u18
p_m_u18c = p_m - p_m_u18
p_mc_u18c = p_mc - p_mc_u18

# Print the 2×2 table
print("=== (a) 2×2 Contingency Table ===")
print("        |      M      |     M^c     |  RowSum ")
print("------------------------------------------------")
print(f"U18     | {p_m_u18:10.2f} | {p_mc_u18:10.2f} | {p_u18:7.2f}")
print(f"U18^c   | {p_m_u18c:10.2f} | {p_mc_u18c:10.2f} | {1 - p_u18:7.2f}")
print("------------------------------------------------")
print(f"ColSum  | {p_m:10.2f} | {p_mc:10.2f} |   1.00\n")

# P(M ∪ U18) = P(M) + P(U18) - P(M ∩ U18)
p_m_or_u18 = p_m + p_u18 - p_m_u18
print(f"=== (b) P(M ∪ U18) ===  {p_m_or_u18:.2f}\n")

# Probability that the person is neither male nor under 18 => 1 - P(M ∪ U18)
p_neither = 1 - p_m_or_u18
print(f"=== (c) Probability of 'neither male nor under 18' ===  {p_neither:.2f}\n")

# Probability(male AND >=18) => P(M ∩ U18^c)
print(f"=== (d) P(male and >=18) ===  {p_m_u18c:.2f}\n")

# Check if M and U18 are mutually exclusive (P(M ∩ U18)=0?)
print("=== (e) Are M and U18 mutually exclusive? ===")
if abs(p_m_u18) < 1e-9:
    print("Yes, they are mutually exclusive (intersection ~ 0).")
else:
    print("No, they are NOT mutually exclusive (intersection is nonzero).")



=== (a) 2×2 Contingency Table ===
        |      M      |     M^c     |  RowSum 
------------------------------------------------
U18     |       0.07 |       0.08 |    0.15
U18^c   |       0.74 |       0.11 |    0.85
------------------------------------------------
ColSum  |       0.81 |       0.19 |   1.00

=== (b) P(M ∪ U18) ===  0.89

=== (c) Probability of 'neither male nor under 18' ===  0.11

=== (d) P(male and >=18) ===  0.74

=== (e) Are M and U18 mutually exclusive? ===
No, they are NOT mutually exclusive (intersection is nonzero).


In [24]:
# Q2 Answer
# # A city has 40% (p=0.4) of voters in favor of a smoking-ban policy. 
# I randomly sample 5 voters (n=5).
# Let X = number of voters (out of 5) who favor the ban.

import math

# Assign n=5, p=0.4, then q=1-p=0.6
n = 5
p = 0.4
q = 1 - p  

# Define a function to compute P(X=k) in a Binomial(n,p)
def binomial_probability(k, n, p):
    """Return P(X = k) for a Binomial(n, p)."""
    return math.comb(n, k) * (p ** k) * (q ** (n - k))

# (a) Probability 2 favor the ban, i.e. X=2
p_x_eq_2 = binomial_probability(2, n, p)

# (b) Probability X < 4 => X=0,1,2,3
# Summation of P(X=0), P(X=1), P(X=2), P(X=3)
p_x_lt_4 = sum(binomial_probability(k, n, p) for k in [0, 1, 2, 3])

# Probability X >= 1 => 1 - P(X=0)
p_x_ge_1 = 1 - binomial_probability(0, n, p)

print("=== Q2 Results ===")
print(f"(a) P(X=2)        = {p_x_eq_2:.4f}")
print(f"(b) P(X<4)        = {p_x_lt_4:.4f}")
print(f"(c) P(X>=1)       = {p_x_ge_1:.4f}")


=== Q2 Results ===
(a) P(X=2)        = 0.3456
(b) P(X<4)        = 0.9130
(c) P(X>=1)       = 0.9222


In [28]:
# Q3 Answer
# A survey shows 40% (p=0.4) of employed Canadians would have difficulty if paycheque is delayed.
# I randomly select 2, then compute the probability that (a) both, (b) neither, (c) at least one has difficulty.
# Finally, (d) find the minimum n so that P(at least one difficulty) >= 0.95.

p = 0.4      # Probability of having difficulty
q = 1 - p    # Probability of not having difficulty

# (a) Both have difficulty => p * p
p_both = p * p

# (b) Neither => q * q
p_neither = q * q

# (c) At least one => 1 - probability(neither)
p_at_least_one = 1 - p_neither

# (d) Minimum n so that P(at least one difficulty) >= 0.95
#     which is 1 - (q^n) >= 0.95  =>  (q^n) <= 0.05
threshold = 0.05
n = 1
while True:
    if q**n <= threshold:
        break
    n += 1

print("=== Q3 Results ===")
print(f"(a) Probability both have difficulty: {p_both:.2f}")
print(f"(b) Probability neither have difficulty: {p_neither:.2f}")
print(f"(c) Probability at least one has difficulty: {p_at_least_one:.2f}")
print(f"(d) Minimum n for 'at least one difficulty' >= 0.95 is: {n}")


=== Q3 Results ===
(a) Probability both have difficulty: 0.16
(b) Probability neither have difficulty: 0.36
(c) Probability at least one has difficulty: 0.64
(d) Minimum n for 'at least one difficulty' >= 0.95 is: 6


In [32]:
# Q4 Answer

# I have SAT scores ~ Normal(μ=530, σ=110). I want P(X>500).
# I'll define a custom normal_cdf using erf, then compute 1 - CDF(500).

def normal_cdf(x, mean, std):
    """
    Return the cumulative distribution function (CDF) value for a normal distribution N(mean, std^2), using the error function (erf).

    Formula:
      Z = (x - mean) / (std * sqrt(2))
      CDF(x) = 0.5 * [1 + erf(Z)]
    """
    z = (x - mean) / (std * math.sqrt(2))
    return 0.5 * (1.0 + math.erf(z))

# Given parameters
mu = 530      # mean
sigma = 110   # standard deviation
score_value = 500

# Probability of scoring above 'score_value' => 1 - CDF(score_value)
# = 1 - Probability(X <= 500)
p_above_500 = 1 - normal_cdf(score_value, mu, sigma)

print("=== Q4 Result ===")
print(f"Probability of scoring above {score_value} is approximately: {p_above_500:.4f}")


=== Q4 Result ===
Probability of scoring above 500 is approximately: 0.6075


In [36]:
# Q5 Answer
# Customers' concession spending ~ Normal(mean=4.11, std=1.37).
# I want the percentage of customers who spend less than $3.00.

def normal_cdf(x, mean, std):
    # Approximate the CDF of a normal distribution N(mean, std^2), using the error function.
    # CDF(X) = 0.5 * [1 + erf((x - mean)/(std*sqrt(2)))].
    z = (x - mean) / (std * math.sqrt(2))
    return 0.5 * (1.0 + math.erf(z))

# Given parameters
mean_spending = 4.11
std_spending = 1.37
threshold_spending = 3.00

# Probability (X < threshold_spending) = normal_cdf(3.00, 4.11, 1.37)
p_less_than_3 = normal_cdf(threshold_spending, mean_spending, std_spending)

# Convert to percentage
percent_less_than_3 = p_less_than_3 * 100

print("=== Q5 Result ===")
print(f"Percentage of customers spending less than ${threshold_spending:.2f}: "
      f"{percent_less_than_3:.2f}%")


=== Q5 Result ===
Percentage of customers spending less than $3.00: 20.89%


In [40]:
# Q6 Answer

# I have 3 types of coins (A,B,C) with different probabilities of heads:
#   Type A: 0.5
#   Type B: 0.6
#   Type C: 0.9
# The drawer has 2 A-coins, 2 B-coins, and 1 C-coin (total 5 coins).
# I pick a coin at random and flip it once, observing heads (event D).
# I want to fill out a Bayesian table: prior, likelihood, posterior for each type.

types = ["A", "B", "C"]

# Prior = # coins of each type / total coins
# A=2/5, B=2/5, C=1/5
prior = {
    "A": 2/5,  # 2 out of 5 coins are type A
    "B": 2/5,  # 2 out of 5 coins are type B
    "C": 1/5   # 1 out of 5 coins are type C
}
# Likelihood = Probability of heads given the coin type
likelihood = {
    "A": 0.5,  # Probability of heads if coin is type A
    "B": 0.6,  # Probability of heads if coin is type B
    "C": 0.9   # Probability of heads if coin is type C
}

# Compute P(D) = sum_over_types [ P(D|type) * P(type) ]
p_d = 0
for coin_type in types:
    p_d += likelihood[coin_type] * prior[coin_type]

#Posterior = [likelihood * prior] / P(D)
posterior = {}
for coin_type in types:
    numerator = likelihood[coin_type] * prior[coin_type]
    posterior[coin_type] = numerator / p_d

print("=== Q6 Bayesian Table ===\n")

print(f"{'hypothesis':<11} | {'prior':>7} | {'likelihood':>10} | {'posterior':>10}")
print(f"{'H':<11} | {'P(H)':>7} | {'P(D|H)':>10} | {'P(H|D)':>10}")
print("-"*60)

for coin_type in types:
    print(f"{coin_type:<11} | "
          f"{prior[coin_type]:7.3f} | "
          f"{likelihood[coin_type]:10.3f} | "
          f"{posterior[coin_type]:10.3f}")

=== Q6 Bayesian Table ===

hypothesis  |   prior | likelihood |  posterior
H           |    P(H) |     P(D|H) |     P(H|D)
------------------------------------------------------------
A           |   0.400 |      0.500 |      0.323
B           |   0.400 |      0.600 |      0.387
C           |   0.200 |      0.900 |      0.290


In [44]:
# Q7 Answer

# I have a dataset (S.No, Malicious, Viagra, Meet, class) of 4 samples, plus a new message M5.

# Print the 5×5 table (M1..M4 are training, M5 is the new message)
print("=== Dataset Table (5 rows, 5 columns) ===\n")
print(f"{'S.No':<5} | {'Malicious':<9} | {'Viagra':<6} | {'Meet':<5} | {'class':<5}")
print("-"*50)
print(f"{'M1':<5}  | {'yes':<9}    | {'yes':<6}  | {'yes':<5} | {'spam':<5}")
print(f"{'M2':<5}  | {'no':<9}     | {'no':<6}   | {'yes':<5} | {'ham':<5}")
print(f"{'M3':<5}  | {'yes':<9}    | {'no':<6}   | {'yes':<5} | {'spam':<5}")
print(f"{'M4':<5}  | {'no':<9}     | {'yes':<6}  | {'no':<5}  | {'ham':<5}")
print(f"{'M5':<5}  | {'yes':<9}    | {'no':<6}   | {'yes':<5} | {'???':<5}")
print()


# Prepare the training data (M1..M4)
training_data = [
    ({"Malicious":"yes","Viagra":"yes","Meet":"yes"}, "spam"),
    ({"Malicious":"no","Viagra":"no","Meet":"yes"},   "ham"),
    ({"Malicious":"yes","Viagra":"no","Meet":"yes"}, "spam"),
    ({"Malicious":"no","Viagra":"yes","Meet":"no"},   "ham")
]

# Count spam vs. ham => prior
spam_count = sum(1 for (features, label) in training_data if label=="spam")
ham_count  = sum(1 for (features, label) in training_data if label=="ham")
total = len(training_data)

p_spam_prior = spam_count / total
p_ham_prior  = ham_count  / total

# Define function for conditional probability of a feature, given label
def conditional_probability(feature_name, value, label):
    # Returns P(feature_name = value | class=label) 
    # based on the training_data frequency.
    label_examples = [feat for (feat,lbl) in training_data if lbl == label]
    if len(label_examples) == 0:
        return 0.0
    # Count how many among label_examples have feature_name==value
    match_count = sum(1 for feat in label_examples if feat[feature_name] == value)
    return match_count / len(label_examples)
    
#The new message M5
M5 = {"Malicious":"yes","Viagra":"no","Meet":"yes"}

# Calculate unnormalized P(M5|spam) and P(M5|ham) = prior * Π feature-likelihood
p_m5_given_spam = p_spam_prior
p_m5_given_ham  = p_ham_prior

for f_name, f_val in M5.items():
    p_m5_given_spam *= conditional_probability(f_name, f_val, "spam")
    p_m5_given_ham  *= conditional_probability(f_name, f_val, "ham")

# Normalize
total_both = p_m5_given_spam + p_m5_given_ham
if total_both > 0:
    p_spam_post = p_m5_given_spam / total_both
    p_ham_post  = p_m5_given_ham  / total_both
else:
    p_spam_post = 0
    p_ham_post  = 0

print("=== Q7: Naive Bayes Classification for M5 ===")
print(f"Prior: P(spam)={p_spam_prior:.2f}, P(ham)={p_ham_prior:.2f}")
print(f"M5 -> {M5}")
print(f"Unnormalized: spam={p_m5_given_spam:.6f}, ham={p_m5_given_ham:.6f}")
print(f"Posterior Probability => P(spam|M5)={p_spam_post:.4f}, P(ham|M5)={p_ham_post:.4f}")


=== Dataset Table (5 rows, 5 columns) ===

S.No  | Malicious | Viagra | Meet  | class
--------------------------------------------------
M1     | yes          | yes     | yes   | spam 
M2     | no            | no       | yes   | ham  
M3     | yes          | no       | yes   | spam 
M4     | no            | yes     | no     | ham  
M5     | yes          | no       | yes   | ???  

=== Q7: Naive Bayes Classification for M5 ===
Prior: P(spam)=0.50, P(ham)=0.50
M5 -> {'Malicious': 'yes', 'Viagra': 'no', 'Meet': 'yes'}
Unnormalized: spam=0.250000, ham=0.000000
Posterior Probability => P(spam|M5)=1.0000, P(ham|M5)=0.0000
