# EEG ADHD Classification with Patient-Wise Train-Test Split
This notebook performs EEG data classification with proper patient-wise train/test splitting to prevent data leakage.

In [None]:
import numpy as np
import pandas as pd
import mne
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from scipy.signal import spectrogram
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.metrics import classification_report, confusion_matrix

# Load EEG dataset
data = pd.read_csv("adhdata.csv")

# Patient-wise split
patient_ids = data["ID"].unique()
train_ids, test_ids = train_test_split(patient_ids, test_size=0.2, random_state=42)

train_data = data[data["ID"].isin(train_ids)]
test_data = data[data["ID"].isin(test_ids)]

# Channel info
channel_names = data.columns.drop(["ID", "Class"]).tolist()
sfreq = 128
info = mne.create_info(ch_names=channel_names, sfreq=sfreq, ch_types="eeg")
montage = mne.channels.make_standard_montage("standard_1020")
info.set_montage(montage)

segment_length = 2
overlap = 1
step = segment_length - overlap
n_samples = int(segment_length * sfreq)
step_samples = int(step * sfreq)

def extract_segments(dataframe):
    segments = []
    labels = []
    for pid in dataframe["ID"].unique():
        patient = dataframe[dataframe["ID"] == pid]
        eeg_data = patient.drop(columns=["Class", "ID"]).values.T
        label = patient["Class"].iloc[0]
        raw = mne.io.RawArray(eeg_data, info, verbose=False)
        raw.filter(1., 45., fir_design="firwin", verbose=False)
        for start in range(0, raw.n_times - n_samples + 1, step_samples):
            stop = start + n_samples
            segment = raw.get_data(start=start, stop=stop)
            segments.append(segment)
            labels.append(label)
    return segments, labels

# Extract segments
train_segments, train_labels = extract_segments(train_data)
test_segments, test_labels = extract_segments(test_data)

# Create spectrograms
def to_spectrograms(segments):
    spec_segments = []
    for segment in segments:
        specs = []
        for ch_data in segment:
            f, t, Sxx = spectrogram(
                ch_data, fs=sfreq, window='hann', nperseg=256,
                noverlap=128, nfft=256, scaling='density', mode='magnitude'
            )
            specs.append(Sxx)
        spec_segments.append(np.stack(specs, axis=0))
    return np.array(spec_segments)

X_train = to_spectrograms(train_segments)
X_test = to_spectrograms(test_segments)

# Encode labels
le = LabelEncoder()
y_train = le.fit_transform(train_labels)
y_test = le.transform(test_labels)

# Model
input_shape = X_train.shape[1:]
model = models.Sequential([
    layers.Input(shape=input_shape),
    layers.Conv2D(32, (3, 3), activation='relu', padding='same', data_format='channels_first'),
    layers.MaxPooling2D((2, 1), data_format='channels_first'),
    layers.Conv2D(64, (3, 3), activation='relu', padding='same', data_format='channels_first'),
    layers.MaxPooling2D((2, 1), data_format='channels_first'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Training
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluation
test_loss, test_acc = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int).flatten()

print(f"Test Accuracy: {test_acc:.4f}")
print(classification_report(y_test, y_pred_classes))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred_classes)
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix (Patient-Wise Split)")
plt.show()
