In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import os

# Load the dataset
file_path = "data/eeg_data_with_mean.csv"
if not os.path.exists(file_path):
    raise FileNotFoundError("The file 'eeg_data_with_mean.csv' was not found. Please run the previous steps.")

# Read the dataset
df = pd.read_csv(file_path)

# Create mood labels based on Mean_EEG values
def classify_mood(mean_eeg):
    if mean_eeg > 0.7:
        return "Happy"
    elif mean_eeg >= 0.3:
        return "Neutral"
    else:
        return "Sad"

# Apply mood classification
df["Mood"] = df["Mean_EEG"].apply(classify_mood)

# Prepare features and labels for training
X = df[[col for col in df.columns if "EEG_Signal" in col]]
y = df["Mood"]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the k-NN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# Predict on test data
y_pred = knn.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Mood Classification: {accuracy * 100:.2f}%")

# Save the final dataset with mood labels
df.to_csv("data/eeg_data_with_mood.csv", index=False)

print("\nSample Data with Mood Labels:")
print(df.head(10))


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import os

# Load the dataset
file_path = "data/eeg_data_with_mean.csv"
if not os.path.exists(file_path):
    raise FileNotFoundError("The file 'eeg_data_with_mean.csv' was not found. Please run the previous steps.")

# Read the dataset
df = pd.read_csv(file_path)

# Create mood labels based on Mean_EEG values
def classify_mood(mean_eeg):
    if mean_eeg > 0.7:
        return "Happy"
    elif mean_eeg >= 0.3:
        return "Neutral"
    else:
        return "Sad"

# Apply mood classification
df["Mood"] = df["Mean_EEG"].apply(classify_mood)

# Prepare features and labels for training
X = df[[col for col in df.columns if "EEG_Signal" in col]]
y = df["Mood"]

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build the Random Forest Classifier (You can also try SVM or Logistic Regression)
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Mood Classification: {accuracy * 100:.2f}%")

# Optionally, perform cross-validation to check model stability
cv_scores = cross_val_score(model, X_scaled, y, cv=5)
print(f"Cross-Validation Accuracy: {cv_scores.mean() * 100:.2f}%")

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [10, 20, None],
    'min_samples_split': [2, 5, 10]
}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)
print(f"Best Hyperparameters: {grid_search.best_params_}")

# Save the final dataset with mood labels
df.to_csv("data/eeg_data_with_mood.csv", index=False)

print("\nSample Data with Mood Labels:")
print(df.head(10))


In [None]:
pip install pandas scikit-learn xgboost imbalanced-learn numpy

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
import os

# Load the dataset
file_path = "data/eeg_dataset_mean.csv"
if not os.path.exists(file_path):
    raise FileNotFoundError("The file 'eeg_data_with_mean.csv' was not found. Please run the previous steps.")

# Read the dataset
df = pd.read_csv(file_path)

# Normalize EEG signals using Min-Max Scaling
scaler = MinMaxScaler()
eeg_columns = [col for col in df.columns if "EEG_Signal" in col]
df[eeg_columns] = scaler.fit_transform(df[eeg_columns])

# Create mood labels based on Mean_EEG values
def classify_mood(mean_eeg):
    if mean_eeg > 0.7:
        return "Happy"
    elif mean_eeg >= 0.45:
        return "Neutral"
    else:
        return "Sad"

# Apply mood classification
df["Mood"] = df["Mean_EEG"].apply(classify_mood)

# Check class distribution
print("Mood Class Distribution:")
print(df["Mood"].value_counts())

# If any class has less than 2 samples, add random noise to balance the dataset
moods = ["Happy", "Neutral", "Sad"]
for mood in moods:
    if df[df["Mood"] == mood].shape[0] < 2:
        # Create new samples by adding random noise
        new_samples = 2 - df[df["Mood"] == mood].shape[0]
        for _ in range(new_samples):
            random_patient = df.sample(1)
            random_patient["Mood"] = mood
            random_patient[eeg_columns] = random_patient[eeg_columns] + np.random.normal(0, 0.01, size=len(eeg_columns))
            df = pd.concat([df, random_patient], ignore_index=True)

# Prepare features and labels for training
X = df[eeg_columns]
y = df["Mood"]

# Split data into training and testing sets (Stratified Sampling)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Build the k-NN classifier with tuned parameters
knn = KNeighborsClassifier(n_neighbors=5, metric='manhattan')
knn.fit(X_train, y_train)

# Predict on test data
y_pred = knn.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\n✅ Accuracy of Mood Classification: {accuracy * 100:.2f}%")

# Save the final dataset with mood labels
df.to_csv("data/eeg_data_with_mood.csv", index=False)

print("\nSample Data with Mood Labels:")
print(df.head(10))


Mood Class Distribution:
Mood
Neutral    905
Happy       57
Sad         38
Name: count, dtype: int64

✅ Accuracy of Mood Classification: 92.50%

Sample Data with Mood Labels:
   Patient_ID      Disease Health_Status     Mood  Mean_EEG  EEG_Signal_1  \
0           1      Healthy       Healthy  Neutral  0.513430      0.492250   
1           2    Parkinson     Unhealthy  Neutral  0.535865      0.367149   
2           3  Alzheimer's     Unhealthy  Neutral  0.486303      0.484502   
3           4       Stroke     Unhealthy  Neutral  0.632977      0.631354   
4           5     Migraine     Unhealthy  Neutral  0.539713      0.399607   
5           6  Brain Tumor     Unhealthy  Neutral  0.485634      0.393733   
6           7       Stroke     Unhealthy  Neutral  0.631635      0.651619   
7           8     Migraine     Unhealthy      Sad  0.448007      0.329229   
8           9     Epilepsy     Unhealthy  Neutral  0.696455      0.761734   
9          10  Brain Tumor     Unhealthy  Neutral  0.52