In [42]:
import pandas as pd
from sklearn.model_selection import KFold
import numpy as np
from sklearn.metrics import accuracy_score, cohen_kappa_score, precision_score, recall_score, f1_score, confusion_matrix

In [45]:
df1 = pd.read_csv('spect_train.csv')
df2 = pd.read_csv('spect_test.csv')
X_train = df1.iloc[:, 1:]
y_train = df1.iloc[:, 0]
X_test = df2.iloc[:, 1:]
y_test = df2.iloc[:, 0]
# Combine X_train and X_test
X = pd.concat([X_train, X_test], axis=0).reset_index(drop=True)

# Combine y_train and y_test
y = pd.concat([y_train, y_test], axis=0).reset_index(drop=True)


In [44]:
# Navie bayes algorithm

def fit(X, y):
    classes = np.unique(y)
    mean = {}
    var = {}
    priors = {}
    n_samples, n_features = X.shape

    for cls in classes:
        X_cls = X[y == cls]
        mean[cls] = X_cls.mean(axis=0)
        var[cls] = X_cls.var(axis=0)
        priors[cls] = len(X_cls) / n_samples

    return classes, mean, var, priors

def gaussian_pdf(x, mean, var):
    eps = 1e-6  # To prevent division by zero
    coef = 1.0 / np.sqrt(2.0 * np.pi * var + eps)
    exponent = np.exp(-((x - mean) ** 2) / (2.0 * var + eps))
    return coef * exponent

def predict(X, classes, mean, var, priors):
    y_pred = []
    for x in X.values:
        posteriors = []
        for cls in classes:
            prior = np.log(priors[cls])
            class_conditional = np.sum(
                np.log(gaussian_pdf(x, mean[cls], var[cls]))
            )
            posterior = prior + class_conditional
            posteriors.append(posterior)
        y_pred.append(classes[np.argmax(posteriors)])
    return np.array(y_pred)


In [40]:
# without k-fold validation
classes, mean, var, priors = fit(X_train, y_train)
y_pred = predict(X_test, classes, mean, var, priors)

accuracy = np.mean(y_pred == y_test)

print(f"Accuracies for each fold: {accuracy}")

Accuracies for each fold: 0.6923076923076923


In [48]:
# 10-Fold Cross Validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Lists to store metrics for each fold
accuracies = []
kappa_scores = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Train and test the model
    classes, mean, var, priors = fit(X_train, y_train)
    y_pred = predict(X_test, classes, mean, var, priors)

    accuracy = accuracy_score(y_test, y_pred)
    kappa = cohen_kappa_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Append results
    accuracies.append(accuracy)
    kappa_scores.append(kappa)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices.append(conf_matrix)

# Display metrics
print(f"Accuracies for each fold: {accuracies}")
print(f"Average Accuracy: {np.mean(accuracies):.4f}")
print(f"Kappa Scores for each fold: {kappa_scores}")
print(f"Average Kappa Score: {np.mean(kappa_scores):.4f}")
print(f"Precisions for each fold: {precisions}")
print(f"Average Precision: {np.mean(precisions):.4f}")
print(f"Recalls for each fold: {recalls}")
print(f"Average Recall: {np.mean(recalls):.4f}")
print(f"F1-Scores for each fold: {f1_scores}")
print(f"Average F1-Score: {np.mean(f1_scores):.4f}")
print("Confusion Matrices for each fold:")
for i, cm in enumerate(confusion_matrices):
    print(f"Fold {i+1}:\n{cm}")

Accuracies for each fold: [0.7777777777777778, 0.5555555555555556, 0.7037037037037037, 0.6666666666666666, 0.6666666666666666, 0.7777777777777778, 0.7037037037037037, 0.6153846153846154, 0.6153846153846154, 0.6923076923076923]
Average Accuracy: 0.6775
Kappa Scores for each fold: [0.4375, 0.21359223300970875, 0.4193548387096775, 0.18181818181818177, 0.3485254691689007, 0.4563758389261745, 0.4193548387096775, 0.19753086419753074, 0.3264248704663213, 0.40909090909090917]
Average Kappa Score: 0.3410
Precisions for each fold: [0.9444444444444444, 1.0, 1.0, 0.8333333333333334, 1.0, 1.0, 1.0, 0.8571428571428571, 1.0, 1.0]
Average Precision: 0.9635
Recalls for each fold: [0.7727272727272727, 0.4782608695652174, 0.6190476190476191, 0.7142857142857143, 0.5909090909090909, 0.7391304347826086, 0.6190476190476191, 0.6, 0.47368421052631576, 0.6]
Average Recall: 0.6207
F1-Scores for each fold: [0.85, 0.6470588235294118, 0.7647058823529411, 0.7692307692307693, 0.7428571428571429, 0.85, 0.7647058823529