In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from scipy.stats import norm

In [None]:
np.random.seed(42)
num_samples = 1000

In [None]:
height_G1 = np.random.normal(loc=166, scale=5, size=num_samples)
hb_G1 = np.random.normal(loc=14, scale=1.5, size=num_samples)

In [None]:
height_G2 = np.random.normal(loc=152, scale=5, size=num_samples)
hb_G2 = np.random.normal(loc=13, scale=1.5, size=num_samples)

In [None]:
# Plot histograms
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

# Height Distribution
axes[0].hist(height_G1, bins=30, alpha=0.6, color='blue', label="Class G1")
axes[0].hist(height_G2, bins=30, alpha=0.6, color='red', label="Class G2")
axes[0].set_title("Height Distribution")
axes[0].set_xlabel("Height (cm)")
axes[0].set_ylabel("Frequency")
axes[0].legend()

# Haemoglobin Distribution
axes[1].hist(hb_G1, bins=30, alpha=0.6, color='blue', label="Class G1")
axes[1].hist(hb_G2, bins=30, alpha=0.6, color='red', label="Class G2")
axes[1].set_title("Haemoglobin Distribution")
axes[1].set_xlabel("Haemoglobin (g/dL)")
axes[1].set_ylabel("Frequency")
axes[1].legend()

In [None]:
plt.figure(figsize=(8,6))
plt.scatter(height_G1, hb_G1, label="Class 1 (G1)", alpha=0.5)
plt.scatter(height_G2, hb_G2, label="Class 2 (G2)", alpha=0.5)
plt.xlabel("Height (cm)")
plt.ylabel("Hemoglobin Level")
plt.legend()
plt.title("Generated Data Distribution")
plt.show()

In [None]:
X = np.vstack((np.column_stack((height_G1, hb_G1)), np.column_stack((height_G2, hb_G2))))
Y = np.hstack((np.zeros(num_samples), np.ones(num_samples)))

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
prior_G1 = prior_G2 = 0.5

In [None]:
x_train_G1 = X_train[Y_train == 0]
x_train_G2 = X_train[Y_train == 1]

In [None]:
mean_height_G1, std_height_G1 = np.mean(x_train_G1[:, 0]), np.std(x_train_G1[:, 0])
mean_hb_G1, std_hb_G1 = np.mean(x_train_G1[:, 1]), np.std(x_train_G1[:, 1])

In [None]:
mean_height_G2, std_height_G2 = np.mean(x_train_G2[:, 0]), np.std(x_train_G2[:, 0])
mean_hb_G2, std_hb_G2 = np.mean(x_train_G2[:, 1]), np.std(x_train_G2[:, 1])

In [None]:
def predict_bayes(local_x):
    probs_G1 = prior_G1 * norm.pdf(local_x[:, 0], mean_height_G1, std_height_G1) * norm.pdf(local_x[:, 1], mean_hb_G1, std_hb_G1)
    probs_G2 = prior_G2 * norm.pdf(local_x[:, 0], mean_height_G2, std_height_G2) * norm.pdf(local_x[:, 1], mean_hb_G2, std_hb_G2)

    return np.array(probs_G2 > probs_G1, dtype=int)

In [None]:
Y_pred_bayes = predict_bayes(X_test)

In [None]:
accuracy_score(Y_test, Y_pred_bayes)