In [None]:
!pip install kaggle
from google.colab import files
files.upload()  # Upload kaggle.json
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d wanghaohan/confused-eeg
!unzip confused-eeg.zip

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import warnings
warnings.filterwarnings('ignore')

# Load the main EEG data
df = pd.read_csv('EEG_data.csv')

# Load demographics (optional, for subject context)
demo = pd.read_csv('demographic_info.csv')

print("EEG Data Shape:", df.shape)
print("\nFirst 5 rows:")
print(df.head())
print("\nColumns:")
print(df.columns.tolist())

In [None]:
# Auto-find label (use predefined if available, else first label col)
label_col = next((col for col in df.columns if 'predef' in col.lower()),
                 next((col for col in df.columns if 'label' in col.lower()), None))
if label_col is None:
    print("No label column found! Run Step 1 to check.")
else:
    print(f"Using label column: {label_col}")

    # Rest of the code (melt for plotting)
    bands = ['Delta', 'Theta', 'Alpha1', 'Alpha2', 'Beta1', 'Beta2', 'Gamma1', 'Gamma2']
    band_df = df[bands + [label_col]].melt(id_vars=label_col, var_name='Band', value_name='Power')

    plt.figure(figsize=(10, 6))
    sns.boxplot(data=band_df, x='Band', y='Power', hue=label_col)
    plt.title('EEG Band Powers by Confusion State')
    plt.xticks(rotation=45)
    plt.show()



In [None]:
# Features (update if your bands differ)
features = ['Attention', 'Meditation', 'Raw', 'Delta', 'Theta', 'Alpha1', 'Alpha2', 'Beta1', 'Beta2', 'Gamma1', 'Gamma2']

# Auto-find label
label_col = next((col for col in df.columns if 'predef' in col.lower()),
                 next((col for col in df.columns if 'label' in col.lower()), 'label'))  # Fallback to 'label'

# Build corr with available features
avail_features = [f for f in features if f in df.columns]
corr_data = df[avail_features + [label_col]].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr_data, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlations (Focus on Label Column)')
plt.show()

In [None]:
# Same auto-find for label_col as above
# ... (insert the label_col detection code here)

X = df[avail_features]  # Use available features
y = df[label_col]

# Rest of the code remains the same (train_test_split, model, etc.)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

# Confusion Matrix plot (unchanged)
plt.figure(figsize=(6, 4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True')
plt.xlabel('Predicted')
plt.show()

# Feature Importance (unchanged)
importances = pd.DataFrame({'Feature': avail_features, 'Importance': model.feature_importances_}).sort_values('Importance', ascending=False)
print(importances)
plt.figure(figsize=(8, 5))
sns.barplot(data=importances, x='Importance', y='Feature')
plt.title('Feature Importance for Confusion Prediction')
plt.show()