In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import joblib

# Load the dataset
df = pd.read_csv("data/eeg_dataset_mean.csv")

# Encode Mood labels
le = LabelEncoder()
df["Mood"] = le.fit_transform(df["Mood"])

# Create a Health Status column (Healthy = 0, Unhealthy = 1)
df["Health_Status"] = df["Mood"].apply(lambda x: 1 if x != 0 else 0)

# Features and target variable
X = df[["Mood", "Mean_EEG"]]
y = df["Health_Status"]

# Stratified sampling
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Initialize and train the Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict on the test set
y_pred = rf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print("✅ Health Status Prediction Accuracy:", accuracy * 100, "%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Create the 'models' directory if it doesn't exist
os.makedirs("models", exist_ok=True)

# Save the trained model
joblib.dump(rf, "models/health_status_classifier.pkl")


✅ Health Status Prediction Accuracy: 100.0 %

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       1.00      1.00      1.00       189

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



['models/health_status_classifier.csv']