In [None]:
!pip install numpy pandas scikit-learn matplotlib



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, confusion_matrix
from scipy.stats import multivariate_normal


In [None]:
# Generate synthetic dataset
np.random.seed(42)
n_samples = 1000
n_features = 4
n_classes = 3

# Random class labels
y = np.random.choice(n_classes, size=n_samples, p=[0.3, 0.4, 0.3])

# Generate class-dependent features
means = np.array([[0,0,0,0], [2,2,2,2], [4,4,4,4]])
cov = np.eye(n_features)
X = np.vstack([np.random.multivariate_normal(means[c], cov, size=np.sum(y==c)) for c in range(n_classes)])

# Shuffle dataset
perm = np.random.permutation(n_samples)
X, y = X[perm], y[perm]

# Put into a DataFrame
df = pd.DataFrame(X, columns=[f"feature_{i+1}" for i in range(n_features)])
df["class"] = y
df.head()


Unnamed: 0,feature_1,feature_2,feature_3,feature_4,class
0,4.754733,4.199424,4.473038,3.354428,0
1,2.230071,2.497743,2.665924,2.421587,2
2,-0.211862,3.533434,0.576043,1.733348,2
3,-0.128538,-1.881849,-0.548725,0.092845,2
4,0.399223,0.647196,-0.483186,1.573987,1


In [None]:
class BayesianGaussianClassifier:
    def __init__(self, tied_covariance=True, regularize=1e-6):
        self.tied_covariance = tied_covariance
        self.regularize = regularize
        self.fitted = False

    def fit(self, X, y):
        X = np.asarray(X)
        y = np.asarray(y)
        self.classes_ = np.unique(y)
        self.means_ = {}
        self.covariances_ = {}
        self.priors_ = {}
        n, d = X.shape

        for c in self.classes_:
            Xc = X[y == c]
            self.means_[c] = Xc.mean(axis=0)
            self.covariances_[c] = np.cov(Xc, rowvar=False) + np.eye(d) * self.regularize
            self.priors_[c] = len(Xc) / len(X)

        if self.tied_covariance:
            pooled = np.zeros((d, d))
            for c in self.classes_:
                Xc = X[y == c]
                pooled += (len(Xc) - 1) * np.cov(Xc, rowvar=False)
            pooled /= (len(X) - len(self.classes_))
            self.shared_covariance_ = pooled + np.eye(d) * self.regularize

        self.fitted = True

    def _log_likelihood(self, X, c):
        mu = self.means_[c]
        cov = self.shared_covariance_ if self.tied_covariance else self.covariances_[c]
        return multivariate_normal.logpdf(X, mean=mu, cov=cov)

    def predict(self, X):
        log_probs = []
        for c in self.classes_:
            lp = self._log_likelihood(X, c) + np.log(self.priors_[c])
            log_probs.append(lp)
        log_probs = np.vstack(log_probs).T
        return self.classes_[np.argmax(log_probs, axis=1)]


In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
accuracies = []

for train_idx, test_idx in kf.split(X):
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]

    model = BayesianGaussianClassifier(tied_covariance=True)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    accuracies.append(acc)

print("K-Fold Accuracies:", accuracies)
print("Average Accuracy:", np.mean(accuracies))


K-Fold Accuracies: [0.385, 0.345, 0.4, 0.415, 0.395]
Average Accuracy: 0.388


In [None]:
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix (Last Fold):\n", cm)


Confusion Matrix (Last Fold):
 [[ 0 69  0]
 [ 0 79  0]
 [ 0 52  0]]
