In [None]:

import pandas as pd
import numpy as np
from scipy.signal import welch
from scipy.integrate import trapezoid
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
import pickle

# ---------- 1. Helper Functions ----------
def bandpower(signal, sf, band):
    """Compute power spectral density within a frequency band."""
    f, Pxx = welch(signal, sf)
    low, high = band
    freq_mask = (f >= low) & (f <= high)
    return trapezoid(Pxx[freq_mask], f[freq_mask])

def extract_features(segment, fs=256):
    """Extract EEG features: band powers, variance, and ratios."""
    bp_delta = bandpower(segment, fs, (1,4))
    bp_theta = bandpower(segment, fs, (4,8))
    bp_alpha = bandpower(segment, fs, (8,13))
    bp_beta  = bandpower(segment, fs, (13,30))
    var = np.var(segment)
    ratio_ab = bp_alpha / (bp_beta + 1e-6)
    ratio_tb = bp_theta / (bp_beta + 1e-6)
    return [bp_delta, bp_theta, bp_alpha, bp_beta, var, ratio_ab, ratio_tb]

def load_signal(path):
    """Load EEG signal from .txt, .csv, or .xlsx."""
    if path.endswith(".xlsx"):
        df = pd.read_excel(path)
        col = df.columns[-1]
        return df[col].values
    elif path.endswith(".txt"):
        # Load raw ADC readings (0–1023)
        data = np.loadtxt(path)
        # Normalize BioAmp raw data to -1 … +1 range
        data = (data - 512.0) / 512.0
        return data
    else:
        df = pd.read_csv(path, encoding='latin1')
        col = df.columns[-1]
        return df[col].values

# ---------- 2. Load All Data ----------
file_map = {
    "focused": [
        "P1_FOCUSED.txt", "P2_FOCUSED.txt", "P3_FOCUSED.txt",
        "P4_FOCUSED.txt", "P5_FOCUSED.txt", "P6_FOCUSED.txt"
    ],
    "unfocused": [
        "P2_UNFOCUS.txt", "P3_UNFOCUSED.txt",
        "P4_UNFOCUSED.txt", "P5_UNFOCUSED.txt", "P6_UNFOCUSED.txt"
    ]
}

fs = 256      # Sampling rate in Hz (same as your Arduino)
win = 2 * fs  # 1-second window
X, y = [], []

print("Extracting features...")

for label, paths in file_map.items():
    for path in paths:
        sig = load_signal(path)
        if len(sig) < win:
            continue
        for i in range(0, len(sig) - win, win):
            seg = sig[i:i+win]
            feats = extract_features(seg, fs)
            y.append(label)
            X.append(feats)
        print(f"{label.upper()} - {path}: {len(sig)//win} windows")

X, y = np.array(X), np.array(y)
print("\nTotal segments per class:")
for u, c in zip(*np.unique(y, return_counts=True)):
    print(f"{u}: {c}")

# ---------- 3. Train-Test Split ----------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)

# ---------- 4. Train KNN ----------
print("\nTraining KNN model...")
knn = KNeighborsClassifier(n_neighbors=5, weights='distance')
knn.fit(X_train, y_train)

# ---------- 5. Evaluate ----------
y_pred = knn.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# ---------- 6. Save Model and Scaler ----------
with open("knn_eeg_model.pkl", "wb") as f:
    pickle.dump(knn, f)
with open("scaler.pkl", "wb") as f:
    pickle.dump(scaler, f)

print("\n✅ Model and scaler saved successfully.")