In [12]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, KFold
import numpy as np
from sklearn.metrics import accuracy_score, cohen_kappa_score, precision_score, recall_score, f1_score, confusion_matrix

In [2]:
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names) # Label encoder is inbuilt
y = pd.Series(iris.target)

In [4]:
# Navie bayes algorithm

def fit(X, y):
    classes = np.unique(y)
    mean = {}
    var = {}
    priors = {}
    n_samples, n_features = X.shape

    for cls in classes:
        X_cls = X[y == cls]
        mean[cls] = X_cls.mean(axis=0)
        var[cls] = X_cls.var(axis=0)
        priors[cls] = len(X_cls) / n_samples

    return classes, mean, var, priors

def gaussian_pdf(x, mean, var):
    eps = 1e-6  # To prevent division by zero
    coef = 1.0 / np.sqrt(2.0 * np.pi * var + eps)
    exponent = np.exp(-((x - mean) ** 2) / (2.0 * var + eps))
    return coef * exponent

def predict(X, classes, mean, var, priors):
    y_pred = []
    for x in X.values:
        posteriors = []
        for cls in classes:
            prior = np.log(priors[cls])
            class_conditional = np.sum(
                np.log(gaussian_pdf(x, mean[cls], var[cls]))
            )
            posterior = prior + class_conditional
            posteriors.append(posterior)
        y_pred.append(classes[np.argmax(posteriors)])
    return np.array(y_pred)

In [13]:
# 10-Fold Cross Validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Lists to store metrics for each fold
accuracies = []
kappa_scores = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train and test the model
    classes, mean, var, priors = fit(X_train, y_train)
    y_pred = predict(X_test, classes, mean, var, priors)

    accuracy = accuracy_score(y_test, y_pred)
    kappa = cohen_kappa_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Append results
    accuracies.append(accuracy)
    kappa_scores.append(kappa)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices.append(conf_matrix)

# Display metrics
print(f"Accuracies for each fold: {accuracies}")
print(f"Average Accuracy: {np.mean(accuracies):.4f}")
print(f"Kappa Scores for each fold: {kappa_scores}")
print(f"Average Kappa Score: {np.mean(kappa_scores):.4f}")
print(f"Precisions for each fold: {precisions}")
print(f"Average Precision: {np.mean(precisions):.4f}")
print(f"Recalls for each fold: {recalls}")
print(f"Average Recall: {np.mean(recalls):.4f}")
print(f"F1-Scores for each fold: {f1_scores}")
print(f"Average F1-Score: {np.mean(f1_scores):.4f}")
print("Confusion Matrices for each fold:")
for i, cm in enumerate(confusion_matrices):
    print(f"Fold {i+1}:\n{cm}")

Accuracies for each fold: [1.0, 1.0, 1.0, 0.9333333333333333, 1.0, 0.8666666666666667, 0.9333333333333333, 1.0, 1.0, 0.8666666666666667]
Average Accuracy: 0.9600
Kappa Scores for each fold: [1.0, 1.0, 1.0, 0.8979591836734694, 1.0, 0.8, 0.9, 1.0, 1.0, 0.7794117647058824]
Average Kappa Score: 0.9377
Precisions for each fold: [1.0, 1.0, 1.0, 0.9523809523809524, 1.0, 0.8666666666666667, 0.9333333333333332, 1.0, 1.0, 0.8968253968253969]
Average Precision: 0.9649
Recalls for each fold: [1.0, 1.0, 1.0, 0.9333333333333332, 1.0, 0.8888888888888888, 0.9444444444444445, 1.0, 1.0, 0.8968253968253969]
Average Recall: 0.9663
F1-Scores for each fold: [1.0, 1.0, 1.0, 0.9373219373219372, 1.0, 0.85, 0.9326599326599326, 1.0, 1.0, 0.8968253968253969]
Average F1-Score: 0.9617
Confusion Matrices for each fold:
Fold 1:
[[6 0 0]
 [0 6 0]
 [0 0 3]]
Fold 2:
[[4 0 0]
 [0 3 0]
 [0 0 8]]
Fold 3:
[[9 0 0]
 [0 4 0]
 [0 0 2]]
Fold 4:
[[4 0 0]
 [0 6 0]
 [0 1 4]]
Fold 5:
[[6 0 0]
 [0 4 0]
 [0 0 5]]
Fold 6:
[[6 0 0]
 [0