In [None]:
## Project Research Question:
#----------------------------------------------
#Can we accurately classify EEG signals to detect epileptic seizures using deep learning techniques?

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
!pip install tensorflow
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.metrics import (accuracy_score, classification_report, 
                           confusion_matrix, roc_curve, auc, 
                           precision_recall_curve, average_precision_score)
from sklearn.ensemble import (
    RandomForestClassifier, AdaBoostClassifier, VotingClassifier
)
from sklearn.metrics import classification_report, confusion_matrix

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from collections import Counter
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Load dataset
df = pd.read_csv('Epileptic Seizure Recognition.csv')

In [None]:
print(df.info())

In [None]:
# Check first few rows
df.head()

In [None]:
#Drop the ID column
df = df.drop(['Unnamed'], axis=1)

In [None]:
# Check missing values
print(df.isnull().sum())

In [None]:
# Describe numerical features
print(df.describe())

In [None]:
#Handle missing values (replace NaN with column mean)
df.fillna(df.mean(), inplace=True)

In [None]:
# Verify missing values are handled
print(df.isnull().sum())

In [None]:
# Assuming EEG data is in columns like 'X1', 'X2', ..., 'X178'
signal_cols = [col for col in df.columns if col.startswith('X')]

In [None]:
# Create new feature: mean EEG signal
df['mean_signal'] = df[signal_cols].mean(axis=1)

In [None]:
# histogram
plt.figure(figsize=(8,6))
sns.histplot(df[df.columns[30:50]], kde=True, bins=30)
plt.title(f'Histogram of {df.columns[0]}')
plt.show()

In [None]:
# heatmap
plt.figure(figsize=(100,50))
corr = df.corr()
sns.heatmap(corr, annot=True, fmt=".2f", cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

In [None]:
#box plot
plt.figure(figsize=(10, 6))
sns.boxplot(data=df.iloc[:, 1:50])  
plt.title("EEG Signal Distributions")

In [None]:
#pca plot
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(df.drop('y', axis=1))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=df['y'])

In [None]:
#signals plot
plt.figure(figsize=(12, 6))
plt.plot(df.iloc[0, 1:50])  
plt.title("Sample EEG Signal")

In [None]:
# EEG feature columns
eeg_columns = [col for col in df.columns if col.startswith("X")]

# Set up subplots(5 rows)
fig, axes = plt.subplots(5, 1, figsize=(14, 16), sharex=True)

for i, label in enumerate(range(1, 6)):
    sample = df[df['y'] == label].sample(1)
    axes[i].plot(eeg_columns, sample[eeg_columns].values.flatten(), label=f'Class y={label}')
    axes[i].set_title(f'EEG Signal Sample - Diagnosis Class y={label}')
    axes[i].set_ylabel("EEG Value")
    axes[i].legend()
    axes[i].grid(True)

# Common x-axis label
plt.xlabel("EEG Channel (X1 to X178)")
plt.xticks(rotation=90)
plt.tight_layout()
plt.show()

In [None]:
# Target Variable Analysis (assuming 'y' is target)
print("\nTarget variable distribution:")
print(df['y'].value_counts(normalize=True))

plt.figure(figsize=(8,6))
sns.countplot(x='y', data=df)
plt.title("Distribution of Target Variable")
plt.show()

In [None]:
#Feature Distributions (univariate)
num_cols = df.select_dtypes(include=['int64','float64']).columns
for col in num_cols[:5]:  # First 5 numerical features
    plt.figure(figsize=(8,4))
    sns.histplot(df[col], kde=True)
    plt.title(f"Distribution of {col}")
    plt.show()

In [None]:
#Feature vs Target Analysis (bivariate)
for col in num_cols[:3]:  # First 3 features vs target
    plt.figure(figsize=(8,4))
    sns.boxplot(x='y', y=col, data=df)
    plt.title(f"{col} by Target Class")
    plt.show()

In [None]:
#Outlier Detection
for col in num_cols[:3]:
    plt.figure(figsize=(8,4))
    sns.boxplot(x=df[col])
    plt.title(f"Boxplot of {col}")
    plt.show()

In [None]:
# Assuming y=1 is seizure and others are non-seizure
df['binary_label'] = df['y'].apply(lambda x: 1 if x == 1 else 0)

In [None]:
#Separate features and target
X = df.drop('y', axis=1)  
y = df['y'] - 1  # Convert classes from [1-5] to [0-4]
y_cat = to_categorical(y, num_classes=5)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, random_state=42)

In [None]:
# Remove constant features
X_non_constant = X.loc[:, (X != X.iloc[0]).any(axis=0)]

In [None]:
#Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# ANN Model
ann = Sequential([
    Dense(256, input_shape=(X_train_scaled.shape[1],), activation='relu'),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dense(5, activation='softmax')
])

ann.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history_ann = ann.fit(
    X_train_scaled, y_train,
    epochs=30,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

y_pred_ann = ann.predict(X_test_scaled)
print("\n[ANN] Accuracy:", accuracy_score(y_test.argmax(axis=1), y_pred_ann.argmax(axis=1)))
print(classification_report(y_test.argmax(axis=1), y_pred_ann.argmax(axis=1)))

In [None]:
# ANN Training Plots
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_ann.history['accuracy'], label='Train Accuracy')
plt.plot(history_ann.history['val_accuracy'], label='Val Accuracy')
plt.title('ANN Accuracy Over Epochs')
plt.xlabel('Epochs'); plt.ylabel('Accuracy'); plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_ann.history['loss'], label='Train Loss')
plt.plot(history_ann.history['val_loss'], label='Val Loss')
plt.title('ANN Loss Over Epochs')
plt.xlabel('Epochs'); plt.ylabel('Loss'); plt.legend()
plt.tight_layout()
plt.show()

In [None]:
from tensorflow.keras.layers import LSTM

# LSTM Model
X_train_lstm = X_train_scaled.reshape((X_train_scaled.shape[0], 1, X_train_scaled.shape[1]))
X_test_lstm = X_test_scaled.reshape((X_test_scaled.shape[0], 1, X_test_scaled.shape[1]))

lstm = Sequential([
    LSTM(64, input_shape=(1, X_train_scaled.shape[1])),
    Dropout(0.4),
    Dense(5, activation='softmax')
])

lstm.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history_lstm = lstm.fit(
    X_train_lstm, y_train,
    epochs=30,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)

y_pred_lstm = lstm.predict(X_test_lstm)
print("\n[LSTM] Accuracy:", accuracy_score(y_test.argmax(axis=1), y_pred_lstm.argmax(axis=1)))
print(classification_report(y_test.argmax(axis=1), y_pred_lstm.argmax(axis=1)))

In [None]:
# LSTM Training Plots
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_lstm.history['accuracy'], label='Train Accuracy')
plt.plot(history_lstm.history['val_accuracy'], label='Val Accuracy')
plt.title('LSTM Accuracy Over Epochs')
plt.xlabel('Epochs'); plt.ylabel('Accuracy'); plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_lstm.history['loss'], label='Train Loss')
plt.plot(history_lstm.history['val_loss'], label='Val Loss')
plt.title('LSTM Loss Over Epochs')
plt.xlabel('Epochs'); plt.ylabel('Loss'); plt.legend()
plt.tight_layout()
plt.show()

In [None]:
import seaborn as sns
# ANN Confusion Matrix
cm_ann = confusion_matrix(y_test.argmax(axis=1), y_pred_ann.argmax(axis=1))
plt.figure(figsize=(6, 5))
sns.heatmap(cm_ann, annot=True, fmt="d", cmap="Blues", xticklabels=range(1,6), yticklabels=range(1,6))
plt.title("Confusion Matrix - ANN")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
# LSTM Confusion Matrix
cm_lstm = confusion_matrix(y_test.argmax(axis=1), y_pred_lstm.argmax(axis=1))
plt.figure(figsize=(6, 5))
sns.heatmap(cm_lstm, annot=True, fmt="d", cmap="Greens", xticklabels=range(1,6), yticklabels=range(1,6))
plt.title("Confusion Matrix - LSTM")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
# Choose one sample from each class: seizure (y=1) and non-seizure (y!=1)
sample_seizure = df[df['y'] == 1].sample(1)
sample_non_seizure = df[df['y'] != 1].sample(1)

# Extract only EEG features (X1 to X178)
eeg_columns = [col for col in df.columns if col.startswith("X")]
plt.figure(figsize=(12, 6))

# Plot seizure sample
plt.plot(eeg_columns,
         sample_seizure[eeg_columns].values.flatten(),
         label='Seizure (y=1)',
         color='red')

# Plot non-seizure sample
plt.plot(eeg_columns,
         sample_non_seizure[eeg_columns].values.flatten(),
         label=f'Non-Seizure (y={sample_non_seizure["y"].values[0]})',
         color='blue')

plt.title('EEG Signal of One Seizure and One Non-Seizure Sample')
plt.xlabel('EEG Channel')
plt.ylabel('EEG Value')
plt.xticks(rotation=90)
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()