In [6]:
import pandas as pd

# Load dataset
df = pd.read_csv("/content/drive/MyDrive/Data/brainwaves.csv")

# Identify columns for each wave type(the dataset had multiple other types)
delta_cols = [col for col in df.columns if "delta" in col.lower() and "coh" not in col.lower()]
theta_cols = [col for col in df.columns if "theta" in col.lower() and "coh" not in col.lower()]
alpha_cols = [col for col in df.columns if "alpha" in col.lower() and "coh" not in col.lower()]
beta_cols = [col for col in df.columns if "beta" in col.lower() and "highbeta" not in col.lower() and "coh" not in col.lower()]
gamma_cols = [col for col in df.columns if "gamma" in col.lower() and "coh" not in col.lower()]

# Aggregate by mean across all electrodes for each wave type
df["Delta"] = df[delta_cols].mean(axis=1)
df["Theta"] = df[theta_cols].mean(axis=1)
df["Alpha"] = df[alpha_cols].mean(axis=1)
df["Beta"] = df[beta_cols].mean(axis=1)
df["Gamma"] = df[gamma_cols].mean(axis=1)

# Keep only the five main brainwave categories
df= df[["specific.disorder", "Delta", "Theta", "Alpha", "Beta", "Gamma"]]

print(df.head(3))

# Save cleaned dataset
df.to_csv("condensed_brainwave_data.csv", index=False)

print("Condensed dataset saved successfully!")


         specific.disorder      Delta      Theta      Alpha       Beta  \
0  Social anxiety disorder  28.010640  19.579119  23.738571  13.549816   
1      Depressive disorder  26.331350  18.540736  23.990099   9.906452   
2      Depressive disorder  13.028539   8.629804  40.531323  16.568042   

      Gamma  
0  3.370627  
1  2.484837  
2  2.170563  
Condensed dataset saved successfully!


In [9]:
# Splitting  train/test

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
df = pd.read_csv("condensed_brainwave_data.csv")

# Select features and target
X = df[["Delta", "Theta", "Alpha", "Beta", "Gamma"]]
y = df["specific.disorder"]

# Check the distribution of the labels
print(df["specific.disorder"].value_counts())

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

from imblearn.over_sampling import SMOTE

# After splitting into train and test sets
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

specific.disorder
Depressive disorder        134
Social anxiety disorder     43
Adjustment disorder         33
Healthy control             27
Acute stress disorder        9
Name: count, dtype: int64


In [13]:
from sklearn.model_selection import GridSearchCV
# Define the MLP model
mlp = MLPClassifier(hidden_layer_sizes=(128, 64),  # Two hidden layers: 64 neurons → 32 neurons
                    activation='relu',           # ReLU activation for non-linearity
                    solver='adam',               # Adam optimizer for adaptive learning
                    alpha=0.0001,                 # L2 regularization to prevent overfitting
                    batch_size=32,               # Mini-batch size
                    max_iter=2000,                # Max training iterations
                    random_state=42)

# Now use X_train_resampled and y_train_resampled for training
mlp.fit(X_train_resampled, y_train_resampled)

param_grid = {
    'hidden_layer_sizes': [(128, 64), (256, 128, 64)],
    'alpha': [0.0001, 0.001],
    'max_iter': [2000, 3000]
}

#grid_search = GridSearchCV(mlp, param_grid, cv=5, scoring='accuracy')
#grid_search.fit(X_train_resampled, y_train_resampled)

# Print the best parameters found
#print(f"Best parameters: {grid_search.best_params_}")

# Make predictions
y_pred = mlp.predict(X_test)

