In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from numpy.linalg import det, inv

data = pd.read_csv("diabetes.csv")   
X = data.iloc[:, :-1].values        
Y = data.iloc[:, -1].values        

kf = KFold(n_splits=2, shuffle=True, random_state=42)
accuracies = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    Y_train, Y_test = Y[train_index], Y[test_index]
    
    classes = np.unique(Y_train)
    priors = {}
    means = {}
    covs = {}
    
    for c in classes:
        X_c = X_train[Y_train == c]
        priors[c] = len(X_c) / len(X_train)
        means[c] = np.mean(X_c, axis=0)
        covs[c] = np.cov(X_c.T)
    
    def multivariate_gaussian(x, mean, cov):
        d = len(x)
        num = np.exp(-0.5 * ((x - mean).T @ inv(cov) @ (x - mean)))
        den = np.sqrt(((2 * np.pi) ** d) * det(cov))
        return num / den
    
    Y_pred = []
    for x in X_test:
        posteriors = {}
        for c in classes:
            likelihood = multivariate_gaussian(x, means[c], covs[c])
            posteriors[c] = likelihood * priors[c]
        Y_pred.append(max(posteriors, key=posteriors.get))
    
    acc = np.mean(np.array(Y_pred) == Y_test) * 100
    accuracies.append(acc)

print("Accuracy for each fold:", accuracies)
print("Average Accuracy:", np.mean(accuracies))


Accuracy for each fold: [np.float64(75.52083333333334), np.float64(72.91666666666666)]
Average Accuracy: 74.21875


In [8]:
import numpy as np

X = np.array([10, 12, 9, 11, 13.5, 20, 18, 21, 19.5, 22])
Y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])
x_test = 17   

class0 = X[Y == 0]
class1 = X[Y == 1]

prior0 = len(class0) / len(X)
prior1 = len(class1) / len(X)

mean0, std0 = np.mean(class0), np.std(class0, ddof=1)
mean1, std1 = np.mean(class1), np.std(class1, ddof=1)

def gaussian_pdf(x, mean, std):
    return (1 / (np.sqrt(2 * np.pi) * std)) * np.exp(-((x - mean) ** 2) / (2 * std ** 2))

likelihood0 = gaussian_pdf(x_test, mean0, std0)
likelihood1 = gaussian_pdf(x_test, mean1, std1)

posterior0 = likelihood0 * prior0
posterior1 = likelihood1 * prior1

print(f"Class 0: mean={mean0:.2f}, std={std0:.2f}, prior={prior0:.2f}")
print(f"Class 1: mean={mean1:.2f}, std={std1:.2f}, prior={prior1:.2f}")
print(f"\nLikelihoods -> P(x|w0)={likelihood0:.6f}, P(x|w1)={likelihood1:.6f}")
print(f"Posteriors -> P(w0|x)∝{posterior0:.6e}, P(w1|x)∝{posterior1:.6e}")

predicted_class = 0 if posterior0 > posterior1 else 1
print(f"\nPredicted class for x = {x_test} is: {predicted_class}")


Class 0: mean=11.10, std=1.75, prior=0.50
Class 1: mean=20.10, std=1.52, prior=0.50

Likelihoods -> P(x|w0)=0.000759, P(x|w1)=0.032565
Posteriors -> P(w0|x)∝3.796678e-04, P(w1|x)∝1.628240e-02

Predicted class for x = 17 is: 1
