In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
# --- Setup ---
os.makedirs("outputs", exist_ok=True)

In [None]:
def savefig(path):
    plt.savefig(path, dpi=150, bbox_inches='tight')
    plt.close()

In [None]:
# --- Load & clean data ---
df = pd.read_excel("CSE 445_ Data.xlsx")

In [None]:
# Drop empty rows
df = df.dropna(how='all')
df.columns = [c.strip() for c in df.columns]

In [None]:
# Clean reaction time
df['Average Reaction Time'] = (
    df['Average Reaction Time'].astype(str)
    .str.replace('sec', '', regex=False)
    .str.replace('[^0-9.]', '', regex=True)
    .replace('', np.nan)
    .astype(float)
)

In [None]:
df['Accuracy'] = pd.to_numeric(df['Accuracy'], errors='coerce')
df = df.dropna(subset=['Accuracy', 'Average Reaction Time'])

In [None]:
# Save CSV
df.to_csv("outputs/Collected_Data.csv", index=False)

In [None]:
# --- Features ---
X = df[['Accuracy', 'Average Reaction Time']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# --- KMeans clustering ---
kmeans = KMeans(n_clusters=2, random_state=42, n_init="auto")
labels = kmeans.fit_predict(X_scaled)
df["MP1_Label"] = labels

In [None]:
# Save cluster plot
plt.scatter(df['Accuracy'], df['Average Reaction Time'], c=labels)
plt.xlabel("Accuracy")
plt.ylabel("Reaction Time (sec)")
plt.title("KMeans Clusters")
savefig("outputs/mp1_kmeans_clusters.png")

In [None]:
# --- Train/Test split ---
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, labels, test_size=0.3, random_state=42, stratify=labels
)

In [None]:
# --- SVM ---
svm = SVC()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

In [None]:
cm_svm = confusion_matrix(y_test, y_pred_svm)
ConfusionMatrixDisplay(cm_svm).plot()
plt.title("SVM Confusion Matrix")
savefig("outputs/mp1_svm_confusion.png")

In [None]:
# --- Decision Tree ---
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

In [None]:
cm_dt = confusion_matrix(y_test, y_pred_dt)
ConfusionMatrixDisplay(cm_dt).plot()
plt.title("Decision Tree Confusion Matrix")
savefig("outputs/mp1_dt_confusion.png")

In [None]:
# Save tree visualization
plt.figure(figsize=(10,6))
plot_tree(dt, filled=True, feature_names=['Accuracy','Reaction Time'])
savefig("outputs/mp1_decision_tree.png")

In [None]:
print("✅ Mini Project 1 complete. Check the outputs/ folder!")