<a href="https://colab.research.google.com/github/IvaroEkel/Probabilistic-Machine-Learning_Lecture/blob/main/Bayesian_and_frequentist_inference_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# One-Sample Proportion Analysis: Frequentist & Bayesian Approaches

# This notebook walks you through a one-sample proportion test using:
# - Frequentist methods (z-test, unbiasedness, efficiency, CRLB)
# - MLE estimation
# - Fisher Information
# - Bayesian inference with Beta priors

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# --- Parameters for the example ---
n = 100            # Sample size
x = 54             # Number of successes
p_hat = x / n      # Sample proportion
p_0 = 0.6          # Hypothesized proportion

# --- Frequentist z-test ---
se = np.sqrt(p_0 * (1 - p_0) / n)
z = (p_hat - p_0) / se
p_value = 2 * stats.norm.sf(abs(z))

print("--- Frequentist z-test ---")
print(f"Sample proportion: {p_hat:.3f}")
print(f"Z-statistic: {z:.3f}")
print(f"p-value: {p_value:.4f}\n")

# --- Unbiasedness and Variance ---
expected_p_hat = p_0
variance_p_hat = p_0 * (1 - p_0) / n
print("--- Unbiasedness and Variance ---")
print(f"E[p_hat] = {expected_p_hat} (Unbiased)")
print(f"Var(p_hat) = {variance_p_hat:.5f}\n")

# --- Cramér-Rao Lower Bound and Fisher Information ---
fisher_info = n / (p_0 * (1 - p_0))
crlb = 1 / fisher_info
print("--- CRLB and Fisher Information ---")
print(f"Fisher Information: {fisher_info:.2f}")
print(f"CRLB: {crlb:.5f}")
print(f"Var(p_hat) = {variance_p_hat:.5f} (attains CRLB)\n")

# --- MLE ---
# For binomial, MLE of p is simply x/n
p_mle = p_hat
print("--- Maximum Likelihood Estimation (MLE) ---")
print(f"MLE of p: {p_mle:.3f}\n")

# --- Bayesian Inference ---
alpha_prior = 1  # Uniform prior
beta_prior = 1

posterior_alpha = alpha_prior + x
posterior_beta = beta_prior + (n - x)

posterior_mean = posterior_alpha / (posterior_alpha + posterior_beta)
posterior_mode = (posterior_alpha - 1) / (posterior_alpha + posterior_beta - 2)
posterior_var = (posterior_alpha * posterior_beta) / (((posterior_alpha + posterior_beta) ** 2) * (posterior_alpha + posterior_beta + 1))

cred_int = stats.beta.interval(0.95, posterior_alpha, posterior_beta)

print("--- Bayesian Inference ---")
print(f"Posterior: Beta({posterior_alpha}, {posterior_beta})")
print(f"Posterior Mean: {posterior_mean:.3f}")
print(f"Posterior Mode: {posterior_mode:.3f}")
print(f"Posterior Variance: {posterior_var:.5f}")
print(f"95% Credible Interval: ({cred_int[0]:.3f}, {cred_int[1]:.3f})\n")

# --- Plot posterior distribution ---
p_vals = np.linspace(0, 1, 500)
posterior_pdf = stats.beta.pdf(p_vals, posterior_alpha, posterior_beta)

plt.figure(figsize=(8, 5))
plt.plot(p_vals, posterior_pdf, label=f'Beta({posterior_alpha}, {posterior_beta})')
plt.axvline(posterior_mean, color='red', linestyle='--', label='Posterior Mean')
plt.title('Bayesian Posterior Distribution of p')
plt.xlabel('p')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# One-Sample Proportion Analysis: Frequentist & Bayesian Approaches

# 📚 Summary:
# This notebook walks through a one-sample proportion test using:
# - Frequentist methods (z-test, unbiasedness, efficiency, CRLB)
# - MLE estimation
# - Fisher Information
# - Bayesian inference with Beta priors

import numpy as np
import scipy.stats as stats
import matplotlib.pyplot as plt

# --- Parameters for the example ---
n = 100            # Sample size
x = 54             # Number of successes
p_hat = x / n      # Sample proportion
p_0 = 0.6          # Hypothesized proportion

# --- Frequentist z-test ---
se = np.sqrt(p_0 * (1 - p_0) / n)
z = (p_hat - p_0) / se
p_value = 2 * stats.norm.sf(abs(z))

print("--- Frequentist z-test ---")
print(f"Sample proportion: {p_hat:.3f}")
print(f"Z-statistic: {z:.3f}")
print(f"p-value: {p_value:.4f}\n")

# --- Unbiasedness and Variance ---
expected_p_hat = p_0
variance_p_hat = p_0 * (1 - p_0) / n
print("--- Unbiasedness and Variance ---")
print(f"E[p_hat] = {expected_p_hat} (Unbiased)")
print(f"Var(p_hat) = {variance_p_hat:.5f}\n")

# --- Cramér-Rao Lower Bound and Fisher Information ---
fisher_info = n / (p_0 * (1 - p_0))
crlb = 1 / fisher_info
print("--- CRLB and Fisher Information ---")
print(f"Fisher Information: {fisher_info:.2f}")
print(f"CRLB: {crlb:.5f}")
print(f"Var(p_hat) = {variance_p_hat:.5f} (attains CRLB)\n")

# --- MLE ---
# For binomial, MLE of p is simply x/n
p_mle = p_hat
print("--- Maximum Likelihood Estimation (MLE) ---")
print(f"MLE of p: {p_mle:.3f}\n")

# --- Bayesian Inference ---
alpha_prior = 1  # Uniform prior
beta_prior = 1

posterior_alpha = alpha_prior + x
posterior_beta = beta_prior + (n - x)

posterior_mean = posterior_alpha / (posterior_alpha + posterior_beta)
posterior_mode = (posterior_alpha - 1) / (posterior_alpha + posterior_beta - 2)
posterior_var = (posterior_alpha * posterior_beta) / (((posterior_alpha + posterior_beta) ** 2) * (posterior_alpha + posterior_beta + 1))

cred_int = stats.beta.interval(0.95, posterior_alpha, posterior_beta)

print("--- Bayesian Inference ---")
print(f"Posterior: Beta({posterior_alpha}, {posterior_beta})")
print(f"Posterior Mean: {posterior_mean:.3f}")
print(f"Posterior Mode: {posterior_mode:.3f}")
print(f"Posterior Variance: {posterior_var:.5f}")
print(f"95% Credible Interval: ({cred_int[0]:.3f}, {cred_int[1]:.3f})\n")

# --- Plot posterior distribution ---
p_vals = np.linspace(0, 1, 500)
posterior_pdf = stats.beta.pdf(p_vals, posterior_alpha, posterior_beta)

plt.figure(figsize=(8, 5))
plt.plot(p_vals, posterior_pdf, label=f'Beta({posterior_alpha}, {posterior_beta})')
plt.axvline(posterior_mean, color='red', linestyle='--', label='Posterior Mean')
plt.title('Bayesian Posterior Distribution of p')
plt.xlabel('p')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()

# --- Reuse Posterior as New Prior ---
# This cell shows how to carry over the posterior as a new prior (informative update)
# This is useful in Bayesian sequential updating or accumulating evidence over time

# Simulate new data
new_n = 100
new_x = 51

# Use previous posterior as new prior
new_alpha_prior = posterior_alpha
new_beta_prior = posterior_beta

# Update with new data
new_posterior_alpha = new_alpha_prior + new_x
new_posterior_beta = new_beta_prior + (new_n - new_x)

new_mean = new_posterior_alpha / (new_posterior_alpha + new_posterior_beta)
new_ci = stats.beta.interval(0.95, new_posterior_alpha, new_posterior_beta)

print("--- Sequential Update Using Previous Posterior ---")
print(f"New Posterior: Beta({new_posterior_alpha}, {new_posterior_beta})")
print(f"Updated Posterior Mean: {new_mean:.3f}")
print(f"Updated 95% Credible Interval: ({new_ci[0]:.3f}, {new_ci[1]:.3f})")

# Plot updated posterior
new_posterior_pdf = stats.beta.pdf(p_vals, new_posterior_alpha, new_posterior_beta)

plt.figure(figsize=(8, 5))
plt.plot(p_vals, new_posterior_pdf, label=f'Updated Beta({new_posterior_alpha}, {new_posterior_beta})')
plt.axvline(new_mean, color='green', linestyle='--', label='New Posterior Mean')
plt.title('Updated Bayesian Posterior Distribution after New Data')
plt.xlabel('p')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()


# Simulate new data
new_n = 100
new_x = 62

# Use previous posterior as new prior
new_alpha_prior = posterior_alpha
new_beta_prior = posterior_beta

# Update with new data
new_posterior_alpha = new_alpha_prior + new_x
new_posterior_beta = new_beta_prior + (new_n - new_x)

new_mean = new_posterior_alpha / (new_posterior_alpha + new_posterior_beta)
new_ci = stats.beta.interval(0.95, new_posterior_alpha, new_posterior_beta)

print("--- Sequential Update Using Previous Posterior ---")
print(f"New Posterior: Beta({new_posterior_alpha}, {new_posterior_beta})")
print(f"Updated Posterior Mean: {new_mean:.3f}")
print(f"Updated 95% Credible Interval: ({new_ci[0]:.3f}, {new_ci[1]:.3f})")

# Plot updated posterior
new_posterior_pdf = stats.beta.pdf(p_vals, new_posterior_alpha, new_posterior_beta)

plt.figure(figsize=(8, 5))
plt.plot(p_vals, new_posterior_pdf, label=f'Updated Beta({new_posterior_alpha}, {new_posterior_beta})')
plt.axvline(new_mean, color='green', linestyle='--', label='New Posterior Mean')
plt.title('Updated Bayesian Posterior Distribution after New Data')
plt.xlabel('p')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()


# Simulate new data
new_n = 100
new_x = 48

# Use previous posterior as new prior
new_alpha_prior = posterior_alpha
new_beta_prior = posterior_beta

# Update with new data
new_posterior_alpha = new_alpha_prior + new_x
new_posterior_beta = new_beta_prior + (new_n - new_x)

new_mean = new_posterior_alpha / (new_posterior_alpha + new_posterior_beta)
new_ci = stats.beta.interval(0.95, new_posterior_alpha, new_posterior_beta)

print("--- Sequential Update Using Previous Posterior ---")
print(f"New Posterior: Beta({new_posterior_alpha}, {new_posterior_beta})")
print(f"Updated Posterior Mean: {new_mean:.3f}")
print(f"Updated 95% Credible Interval: ({new_ci[0]:.3f}, {new_ci[1]:.3f})")

# Plot updated posterior
new_posterior_pdf = stats.beta.pdf(p_vals, new_posterior_alpha, new_posterior_beta)

plt.figure(figsize=(8, 5))
plt.plot(p_vals, new_posterior_pdf, label=f'Updated Beta({new_posterior_alpha}, {new_posterior_beta})')
plt.axvline(new_mean, color='green', linestyle='--', label='New Posterior Mean')
plt.title('Updated Bayesian Posterior Distribution after New Data')
plt.xlabel('p')
plt.ylabel('Density')
plt.legend()
plt.grid(True)
plt.show()