In [1]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Step 1: Load the pre-trained health status classifier
with open('models/health_status_classifier.pkl', 'rb') as file:
    health_status_model = pickle.load(file)

# Load the dataset
file_path = "data/eeg_data.csv"
df = pd.read_csv(file_path)

# Step 2: Inspect the column names and data
print("Columns in the dataset:", df.columns)

# Step 3: Classify the mood of each patient based on EEG signals
def classify_mood(mean_eeg):
    if mean_eeg > 0.7:
        return "Happy"
    elif mean_eeg >= 0.3:
        return "Neutral"
    else:
        return "Sad"

# Check the column containing EEG signal values
eeg_columns = [col for col in df.columns if "EEG_Signal" in col]
print("EEG Signal Columns:", eeg_columns)

# Calculate the mean EEG signal for each patient (if no 'Mean_EEG' column exists)
df["Mean_EEG"] = df[eeg_columns].mean(axis=1)

# Apply mood classification to the dataset
df["Mood"] = df["Mean_EEG"].apply(classify_mood)

# Step 3: Predict health status (Healthy/Unhealthy)
# Update the following line with the correct column name for health status
# For example, if the column is named "Status", use df["Status"]
y_health = df["Health_Status"]  # <-- Replace "Health_Status" with the actual column name
X_health = df[eeg_columns]
health_status_predictions = health_status_model.predict(X_health)
df["Predicted_Health_Status"] = health_status_predictions

# Filter only the unhealthy individuals
unhealthy_df = df[df['Predicted_Health_Status'] == 'Unhealthy']

# Step 4: Prepare features for disease prediction
# Use both EEG signals and mood as features
X_disease = unhealthy_df[eeg_columns + ['Mood']]
y_disease = unhealthy_df['Disease_Type']

# Convert the 'Mood' feature to numerical values (Happy=0, Neutral=1, Sad=2)
X_disease['Mood'] = X_disease['Mood'].map({'Happy': 0, 'Neutral': 1, 'Sad': 2})

# Step 5: Train the disease prediction model (XGBoost)
X_train, X_test, y_train, y_test = train_test_split(X_disease, y_disease, test_size=0.2, random_state=42)

model = XGBClassifier(objective='multi:softmax', num_class=len(y_disease.unique()), random_state=42)
model.fit(X_train, y_train)

# Step 6: Evaluate the model
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy of Disease Prediction: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10, 7))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=y_disease.unique(), yticklabels=y_disease.unique())
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Feature Importance
feature_importance = model.get_booster().get_score(importance_type='weight')
plt.figure(figsize=(12, 8))
plt.bar(range(len(feature_importance)), list(feature_importance.values()), align='center')
plt.xticks(range(len(feature_importance)), list(feature_importance.keys()), rotation=45)
plt.title('Feature Importance')
plt.show()


Columns in the dataset: Index(['Patient_ID', 'Disease', 'EEG_Signal_1', 'EEG_Signal_2', 'EEG_Signal_3',
       'EEG_Signal_4', 'EEG_Signal_5', 'EEG_Signal_6', 'EEG_Signal_7',
       'EEG_Signal_8', 'EEG_Signal_9', 'EEG_Signal_10', 'EEG_Signal_11',
       'EEG_Signal_12', 'EEG_Signal_13', 'EEG_Signal_14'],
      dtype='object')
EEG Signal Columns: ['EEG_Signal_1', 'EEG_Signal_2', 'EEG_Signal_3', 'EEG_Signal_4', 'EEG_Signal_5', 'EEG_Signal_6', 'EEG_Signal_7', 'EEG_Signal_8', 'EEG_Signal_9', 'EEG_Signal_10', 'EEG_Signal_11', 'EEG_Signal_12', 'EEG_Signal_13', 'EEG_Signal_14']


KeyError: 'Health_Status'