In [13]:
import pandas as pd
import numpy as np
from numpy.linalg import inv, det
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Load your dataset (assumed format)
df = pd.read_csv("nlpca_98perc_6pc.csv")
X = df.iloc[:, :6].values           # First 3 columns as features
y = df.iloc[:, -2].values           # Second last column as CME

# Split dataset
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42, stratify=y_temp)

# Function to compute GDA parameters
def compute_gda_parameters(X, y):
    X0 = X[y == 0]
    X1 = X[y == 1]
    mu0 = np.mean(X0, axis=0)
    mu1 = np.mean(X1, axis=0)
    cov0 = np.cov(X0, rowvar=False)
    cov1 = np.cov(X1, rowvar=False)
    return mu0, mu1, cov0, cov1

# Function to predict using GDA
def gda_predict(X, mu0, mu1, cov0, cov1):
    inv_cov0 = inv(cov0)
    inv_cov1 = inv(cov1)
    det_cov0 = det(cov0)
    det_cov1 = det(cov1)
    preds = []
    for x in X:
        d0 = (x - mu0).T @ inv_cov0 @ (x - mu0) + np.log(det_cov0)
        d1 = (x - mu1).T @ inv_cov1 @ (x - mu1) + np.log(det_cov1)
        preds.append(0 if d0 < d1 else 1)
    return np.array(preds)

# Train model
mu0, mu1, cov0, cov1 = compute_gda_parameters(X_train, y_train)

# Validate
val_preds = gda_predict(X_val, mu0, mu1, cov0, cov1)
print("Validation Confusion Matrix:\n", confusion_matrix(y_val, val_preds))
print("\nValidation Report:\n", classification_report(y_val, val_preds))

# Test
test_preds = gda_predict(X_test, mu0, mu1, cov0, cov1)
print("Test Confusion Matrix:\n", confusion_matrix(y_test, test_preds))
print("\nTest Report:\n", classification_report(y_test, test_preds))

Validation Confusion Matrix:
 [[273351  11591]
 [108139   5371]]

Validation Report:
               precision    recall  f1-score   support

           0       0.72      0.96      0.82    284942
           1       0.32      0.05      0.08    113510

    accuracy                           0.70    398452
   macro avg       0.52      0.50      0.45    398452
weighted avg       0.60      0.70      0.61    398452

Test Confusion Matrix:
 [[273365  11577]
 [108176   5334]]

Test Report:
               precision    recall  f1-score   support

           0       0.72      0.96      0.82    284942
           1       0.32      0.05      0.08    113510

    accuracy                           0.70    398452
   macro avg       0.52      0.50      0.45    398452
weighted avg       0.60      0.70      0.61    398452

