In [None]:
import numpy as np
!pip install PyWavelets
import pywt
from scipy.io import loadmat, savemat
from scipy.signal import butter, filtfilt
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, mutual_info_classif
import os
from google.colab import drive

Collecting PyWavelets
  Downloading pywavelets-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.0 kB)
Downloading pywavelets-1.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/4.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/4.5 MB[0m [31m4.1 MB/s[0m eta [36m0:00:02[0m[2K   [91m━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/4.5 MB[0m [31m18.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m4.5/4.5 MB[0m [31m48.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.5/4.5 MB[0m [31m35.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyWavelets
Successfully installed PyWavelets-1.8.0


In [None]:
# Mount Google Drive
drive.mount('/content/drive')

# Band-pass filter function (0-40 Hz as per paper)
def bandpass_filter(signal, lowcut=0.5, highcut=40.0, fs=173.61, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    return filtfilt(b, a, signal)


Mounted at /content/drive


In [None]:
# Function to extract wavelet features using Stationary Wavelet Transform (SWT)
def extract_wavelet_features(signal, wavelet='db4', level=4):
    if len(signal) % 2 != 0:  # Ensure even length
        signal = signal[:-1]

    coeffs = pywt.swt(signal, wavelet, level=level)
    features = []
    for cA, cD in coeffs:
        features.append(np.mean(np.abs(cA)))  # Mean absolute value of approximation coefficients
        features.append(np.std(cA))  # Standard deviation
        features.append(np.mean(np.abs(cD)))  # Mean absolute value of detail coefficients
        features.append(np.std(cD))  # Standard deviation of detail coefficients
    return np.array(features)


# Function to compute statistical features
def extract_statistical_features(signal):
    features = []
    features.append(np.mean(signal))  # Mean absolute value
    features.append(np.std(signal))  # Standard deviation
    features.append(np.mean((signal - np.mean(signal))**3) / (np.std(signal)**3))  # Skewness
    features.append(np.mean((signal - np.mean(signal))**4) / (np.std(signal)**4))  # Kurtosis
    features.append(np.sqrt(np.mean(signal**2)))  # RMS power
    return np.array(features)


In [None]:
# Define input and output folders
input_folder = "/content/drive/MyDrive/EEG Dataset"
output_folder = os.path.join(input_folder, "Processed_2")
os.makedirs(output_folder, exist_ok=True)

In [None]:
# Process EEG data
label_map = {'F': 0, 'N': 0, 'O': 1, 'Z': 1, 'S': 2}  # Class mapping

for folder in ['F', 'N', 'O', 'S', 'Z']:
    folder_path = os.path.join(input_folder, folder)
    if not os.path.exists(folder_path):
        print(f"Warning: {folder_path} not found. Skipping...")
        continue

    all_features = []
    all_labels = []

    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        signal = np.loadtxt(file_path)  # Load EEG signal

        # Apply band-pass filter
        filtered_signal = bandpass_filter(signal)

        # Normalize signal (-1 to 1 as per paper)
        scaler = MinMaxScaler(feature_range=(-1, 1))
        normalized_signal = scaler.fit_transform(filtered_signal.reshape(-1, 1)).flatten()

        # Extract wavelet features
        wavelet_features = extract_wavelet_features(normalized_signal)

        # Extract statistical features
        statistical_features = extract_statistical_features(normalized_signal)

        # Combine features
        combined_features = np.hstack([wavelet_features, statistical_features])
        all_features.append(combined_features)
        all_labels.append(label_map[folder])

    # Convert to numpy arrays
    all_features = np.array(all_features)
    all_labels = np.array(all_labels)

    # Select best features using Mutual Information & BDFA (Mockup Step)
    selector = SelectKBest(mutual_info_classif, k=19)  # Selecting top 19 features
    selected_features = selector.fit_transform(all_features, all_labels)

    # Save processed features and labels separately for each folder
    save_path = os.path.join(output_folder, f"processed_{folder}.mat")
    savemat(save_path, {"features": selected_features, "labels": all_labels})
    print(f"Processed and saved: {save_path}")

print("Preprocessing complete. Processed data saved.")


Processed and saved: /content/drive/MyDrive/EEG Dataset/Processed_2/processed_F.mat
Processed and saved: /content/drive/MyDrive/EEG Dataset/Processed_2/processed_N.mat
Processed and saved: /content/drive/MyDrive/EEG Dataset/Processed_2/processed_O.mat
Processed and saved: /content/drive/MyDrive/EEG Dataset/Processed_2/processed_S.mat
Processed and saved: /content/drive/MyDrive/EEG Dataset/Processed_2/processed_Z.mat
Preprocessing complete. Processed data saved.


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from scipy.io import loadmat
import os
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

In [None]:
# Define input folder where processed data is stored
processed_folder = "/content/drive/MyDrive/EEG Dataset/Processed_2"

def load_processed_data():
    all_features, all_labels = [], []

    for file in os.listdir(processed_folder):
        if file.endswith(".mat"):
            data = loadmat(os.path.join(processed_folder, file))
            all_features.append(data["features"])
            all_labels.append(data["labels"].flatten())

    # Stack all data
    X = np.vstack(all_features)
    y = np.concatenate(all_labels)
    return X, y


In [34]:
from imblearn.over_sampling import SMOTE
from sklearn.utils.class_weight import compute_class_weight

# Load dataset
X, y = load_processed_data()

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Perform SMOTE to handle imbalance
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)

# Compute class weights to handle imbalance
class_weights = compute_class_weight('balanced', classes=np.unique(y), y=y)
class_weight_dict = {i: class_weights[i] for i in range(len(class_weights))}

# Split into training (80%) and validation (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Define DNN model based on research paper
def create_dnn_model(input_shape, num_classes):
    model = Sequential([
        Input(shape=(input_shape,)),
        Dense(10, activation='sigmoid'),  # First hidden layer
        Dense(10, activation='sigmoid'),  # Second hidden layer
        Dense(10, activation='sigmoid'),  # Third hidden layer
        Dense(num_classes, activation='softmax')  # Output layer
    ])

    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss=SparseCategoricalCrossentropy(),
                  metrics=['accuracy'])

    return model
# Create and train the model
model = create_dnn_model(input_shape=X_train.shape[1], num_classes=len(np.unique(y)))
model.summary()

In [35]:
# Perform 3 experiments as per the research paper
experiments = {
    "Binary Classification (Exp 1: A vs E)": [0, 2],
    "Binary Classification (Exp 2: A+D vs E)": [0, 1, 2],
    "Multi-Class Classification (Exp 3: A vs D vs E)": [0, 1, 2]
}

for exp_name, selected_classes in experiments.items():
    print(f"\nRunning Experiment: {exp_name}")

    # Filter selected classes
    indices = np.isin(y, selected_classes)
    X_exp, y_exp = X[indices], y[indices]

    # Re-map labels to 0,1,... for compatibility with SparseCategoricalCrossentropy
    class_mapping = {c: i for i, c in enumerate(sorted(selected_classes))}
    y_exp = np.array([class_mapping[label] for label in y_exp])

    # Split into training/testing
    X_train, X_test, y_train, y_test = train_test_split(X_exp, y_exp, test_size=0.2, random_state=42, stratify=y_exp)

    # Train model
    model = create_dnn_model(input_shape=X_train.shape[1], num_classes=len(selected_classes))
    model.fit(
        X_train, y_train,
        validation_data=(X_test, y_test),
        epochs=100,
        batch_size=16,
        class_weight=class_weight_dict,
        verbose=1
    )

    # Evaluate model
    y_pred = np.argmax(model.predict(X_test), axis=1)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nExperiment: {exp_name} - Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    # Save trained model
    model.save(f"/content/drive/MyDrive/EEG Dataset/EEG_BDFA_DNN_{exp_name.replace(' ', '_')}.keras")
    print(f"Model for {exp_name} saved successfully!\n")



Running Experiment: Binary Classification (Exp 1: A vs E)
Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 22ms/step - accuracy: 0.5315 - loss: 0.6795 - val_accuracy: 0.6125 - val_loss: 0.6808
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5842 - loss: 0.6786 - val_accuracy: 0.6250 - val_loss: 0.6774
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6325 - loss: 0.6747 - val_accuracy: 0.6375 - val_loss: 0.6743
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6675 - loss: 0.6644 - val_accuracy: 0.6250 - val_loss: 0.6692
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6609 - loss: 0.6672 - val_accuracy: 0.6375 - val_loss: 0.6608
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7417 - loss: 0.6424 - val_accuracy