In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from scipy.stats import multivariate_normal

In [None]:
np.random.seed(42)
num_samples = 1000

In [None]:
mean_G1 = [166, 70]
mean_G2 = [152, 55]

In [None]:
std_G1 = [5, 7]
std_G2 = [5, 7]

In [None]:
correlation = 0.6

In [None]:
cov_G1 = [[std_G1[0]**2, correlation * std_G1[0] * std_G1[1]],
          [correlation * std_G1[0] * std_G1[1], std_G1[1]**2]]

cov_G2 = [[std_G2[0]**2, correlation * std_G2[0] * std_G2[1]],
          [correlation * std_G2[0] * std_G2[1], std_G2[1]**2]]

In [None]:
X_G1 = np.random.multivariate_normal(mean_G1, cov_G1, num_samples)
X_G2 = np.random.multivariate_normal(mean_G2, cov_G2, num_samples)

In [None]:
X = np.vstack((X_G1, X_G2))
Y = np.hstack((np.zeros(num_samples), np.ones(num_samples)))

In [None]:
plt.figure(figsize=(10, 5))
plt.scatter(X[:, 0], X[:, 1], c=Y, cmap='coolwarm', alpha=0.5)
plt.xlabel("Height (cm)")
plt.ylabel("Weight (kg)")
plt.title("Original Data Distribution")
plt.colorbar(label="Class")
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
axes[0].hist(X_G1[:, 0], bins=30, alpha=0.5, label="Class 0 (Height)", color='blue')
axes[0].hist(X_G2[:, 0], bins=30, alpha=0.5, label="Class 1 (Height)", color='red')
axes[0].set_xlabel("Height (cm)")
axes[0].set_ylabel("Frequency")
axes[0].set_title("Height Distribution by Class")
axes[0].legend()

axes[1].hist(X_G1[:, 1], bins=30, alpha=0.5, label="Class 0 (Weight)", color='blue')
axes[1].hist(X_G2[:, 1], bins=30, alpha=0.5, label="Class 1 (Weight)", color='red')
axes[1].set_xlabel("Weight (kg)")
axes[1].set_ylabel("Frequency")
axes[1].set_title("Weight Distribution by Class")
axes[1].legend()

plt.tight_layout()
plt.show()

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
mean_train_G1 = np.mean(X_train[Y_train == 0], axis=0)
mean_train_G2 = np.mean(X_train[Y_train == 1], axis=0)

In [None]:
cov_train_G1 = np.cov(X_train[Y_train == 0], rowvar=False)
cov_train_G2 = np.cov(X_train[Y_train == 1], rowvar=False)

In [None]:
prior_G1 = len(Y_train[Y_train == 0]) / len(Y_train)
prior_G2 = len(Y_train[Y_train == 1]) / len(Y_train)

In [None]:
def predict_bayes_correlated(X_input):
    probs_G1 = prior_G1 * multivariate_normal.pdf(X_input, mean=mean_train_G1, cov=cov_train_G1)
    probs_G2 = prior_G2 * multivariate_normal.pdf(X_input, mean=mean_train_G2, cov=cov_train_G2)

    return np.array(probs_G2 > probs_G1, dtype=int)

In [None]:
Y_pred = predict_bayes_correlated(X_test)

In [None]:
accuracy_pooled = accuracy_score(Y_test, Y_pred)
accuracy_pooled