In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
import time

file_path = "/content/Custom_CNN_Features.csv"
df = pd.read_csv(file_path)

print(df.info())
print(df.isnull().sum())

df.drop(columns=['Filename'], inplace=True)
df.fillna(df.select_dtypes(include=[np.number]).mean(), inplace=True)

print(df.iloc[:, -1].value_counts())

plt.figure(figsize=(12, 6))
sns.heatmap(df.corr(), cmap='coolwarm')
plt.title("Correlation Matrix Heatmap")
plt.show()

print(f"Rank of feature matrix: {np.linalg.matrix_rank(df.iloc[:, :-1].values)}")

scaler = StandardScaler()
X = scaler.fit_transform(df.iloc[:, :-1].values)
y = df.iloc[:, -1].values

df['Class Label'] = df['Class Label'].astype('category').cat.codes

X = df.drop(columns=['Class Label']).values
y = df['Class Label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

classifiers = {
    "kNN": KNeighborsClassifier(),
    "Decision Tree": DecisionTreeClassifier(),
    "SVM": SVC(probability=True),
    "Random Forest": RandomForestClassifier(),
    "Naïve Bayes": GaussianNB()
}

results = []
for name, clf in classifiers.items():
    start_time = time.time()
    clf.fit(X_train, y_train)
    train_time = time.time() - start_time

    start_time = time.time()
    y_pred = clf.predict(X_test)
    test_time = time.time() - start_time

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted')
    rec = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    auc = roc_auc_score(y_test, clf.predict_proba(X_test), multi_class='ovr')

    results.append([name, acc, prec, rec, f1, auc, train_time, test_time])

results_df = pd.DataFrame(results, columns=["Classifier", "Accuracy", "Precision", "Recall", "F1-Score", "AUROC", "Train Time", "Test Time"])
print(results_df)

results_df.to_csv("/content/results.csv", index=False)
