In [7]:
import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

# 1. Set random seed for reproducibility
np.random.seed(42)

# 2. Generate training data
# Apples: mean=160g, std=10g
train_apples = np.random.normal(loc=160, scale=10, size=100)
# Oranges: mean=130g, std=10g
train_oranges = np.random.normal(loc=130, scale=10, size=100)

# 3. Generate test data
test_apples = np.random.normal(loc=160, scale=10, size=25)
test_oranges = np.random.normal(loc=130, scale=10, size=25)
test_data = np.concatenate([test_apples, test_oranges])
test_labels = np.array([1]*25 + [0]*25)  # 1 = apple, 0 = orange

# 4. Estimate parameters using MLE
mu_apple, std_apple = np.mean(train_apples), np.std(train_apples)
mu_orange, std_orange = np.mean(train_oranges), np.std(train_oranges)

# 5. Assign uniform priors
prior_apple = 0.5
prior_orange = 0.5

# 6. Compute likelihoods using Gaussian PDF
likelihood_apple = norm.pdf(test_data, loc=mu_apple, scale=std_apple)
likelihood_orange = norm.pdf(test_data, loc=mu_orange, scale=std_orange)

# 7. Compute posteriors (unnormalized)
posterior_apple = likelihood_apple * prior_apple
posterior_orange = likelihood_orange * prior_orange

# 8. Make MAP prediction
predictions = (posterior_apple > posterior_orange).astype(int)

# 9. Evaluate model
accuracy = np.mean(predictions == test_labels)
print("Accuracy:", accuracy)

# 10. Optional: Build a dataframe for display
import pandas as pd
df = pd.DataFrame({
    "Weight (g)": test_data,
    "True Label": ["Apple" if l == 1 else "Orange" for l in test_labels],
    "Predicted Label": ["Apple" if p == 1 else "Orange" for p in predictions],
    "Correct?": predictions == test_labels
})
print(df.head())


Accuracy: 0.92
   Weight (g) True Label Predicted Label  Correct?
0  163.577874      Apple           Apple      True
1  165.607845      Apple           Apple      True
2  170.830512      Apple           Apple      True
3  170.538021      Apple           Apple      True
4  146.223306      Apple           Apple      True
