# Solution: Bayesian Statistics

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns

sns.set_theme()

### Task 1: Simple Spam Filter

In [None]:
def bayes_update(prior, likelihood_true, likelihood_false):
    # P(B) = P(B|A)P(A) + P(B|~A)P(~A)
    evidence = (likelihood_true * prior) + (likelihood_false * (1 - prior))
    posterior = (likelihood_true * prior) / evidence
    return posterior

p_spam = 0.4
p_buy_given_spam = 0.5
p_buy_given_ham = 0.1

posterior_spam = bayes_update(p_spam, p_buy_given_spam, p_buy_given_ham)
print(f"P(Spam | 'Buy') = {posterior_spam:.4f}")
print("It is much more likely to be Spam (77%) than not.")

### Task 2: Beta Distribution Analysis

In [None]:
x = np.linspace(0, 1, 100)

plt.figure(figsize=(12, 5))

# 1. Initial Beta(2,2)
y_prior = stats.beta.pdf(x, 2, 2)
plt.plot(x, y_prior, label='Prior Beta(2,2)', linestyle='--')

# 2. Update with 100 heads, 100 tails
# Posterior alpha = 2 + 100, beta = 2 + 100
y_post = stats.beta.pdf(x, 102, 102)
plt.plot(x, y_post, label='Posterior Beta(102,102)', linewidth=2)

plt.title('Effect of Data Size on Posterior Variance')
plt.legend()
plt.xlabel('p')
plt.show()

print("Explanation: As we add more data, the variance decreases (the curve becomes narrower/sharper). This represents our increasing certainty about the true parameter value.")