In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.multioutput import MultiOutputClassifier, MultiOutputRegressor
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, mean_absolute_error, r2_score
import shap

ModuleNotFoundError: No module named 'shap'

In [None]:
DATA_PATH = "dreamwave_synthetic_sleep1.csv"
OUTPUT_DIR = Path("neuropredict_outputs")
OUTPUT_DIR.mkdir(exist_ok=True)

In [3]:
df = pd.read_csv(DATA_PATH)

In [4]:
df.shape

(100000, 19)

In [5]:
df.head()

Unnamed: 0,user_id,night,minute,sleep_stage,eeg_theta_power,eeg_gamma_power,eeg_delta_power,heart_rate_bpm,hrv_ms,rem_bursts,chin_emg,respiration_rate,resp_irregularity,skin_conductance,valence,arousal,mood,activity,keywords
0,2,6,20,N1,91.9,14.8,80.5,80.8,41.0,1,12.7,12.3,0.041,2.35,negative,low,stressed,study,"friends, fear"
1,34,2,358,N2,124.6,12.8,170.4,74.1,53.9,0,18.6,12.6,0.036,2.89,negative,low,happy,social,"running, flight, school"
2,6,1,289,N2,139.1,11.5,164.1,75.7,70.1,1,13.1,12.3,0.074,1.97,neutral,high,calm,study,"exam, success, unknown"
3,28,1,191,N3,129.3,8.9,163.9,47.1,80.5,1,16.7,13.2,0.043,2.33,negative,medium,anxious,work,unknown
4,47,2,24,N1,82.7,17.2,100.7,78.8,37.8,1,23.2,12.4,0.055,1.38,neutral,low,calm,social,"school, exam, flight"


In [6]:
def generate_disease_risks_and_stages(df):
    df_risk = df.copy()
    numeric_cols = df_risk.select_dtypes(include=[np.number]).columns.tolist()
    def normalize_score(score, min_val=0, max_val=100):
        score_norm = (score - score.min()) / (score.max() - score.min())
        return score_norm * (max_val - min_val) + min_val
    def get_norm_feature(col, inverse=False):
        if col in numeric_cols:
            vals = df_risk[col].values
            norm = (vals - vals.min()) / (vals.max() - vals.min() + 1e-10)
            return (1 - norm) if inverse else norm
        return np.zeros(len(df_risk))
    alzheimer_components = [
        get_norm_feature('eeg_delta_power', inverse=True) * 25,  # Low delta = higher risk
        get_norm_feature('hrv_ms', inverse=True) * 20,           # Low HRV = higher risk
        get_norm_feature('rem_bursts', inverse=True) * 20,       # Poor REM = higher risk
        get_norm_feature('eeg_theta_power', inverse=True) * 15,  # Theta wave changes
        get_norm_feature('respiration_rate') * 10,               # Irregular breathing
        get_norm_feature('resp_irregularity') * 10               # Breathing irregularity
    ]
    df_risk['Alzheimer_Risk_Score'] = np.sum(alzheimer_components, axis=0)
    parkinson_components = [
        get_norm_feature('chin_emg') * 30,                      # High EMG during REM
        get_norm_feature('rem_bursts') * 25,                    # REM behavior disorder
        get_norm_feature('resp_irregularity') * 20,             # Autonomic dysfunction
        get_norm_feature('hrv_ms', inverse=True) * 15,          # Reduced HRV
        get_norm_feature('eeg_gamma_power', inverse=True) * 10  # Altered gamma activity
    ]
    df_risk['Parkinson_Risk_Score'] = np.sum(parkinson_components, axis=0)
    stress_components = [
        get_norm_feature('arousal') * 25,                       # High arousal
        get_norm_feature('heart_rate_bpm') * 20,                # Elevated heart rate
        get_norm_feature('hrv_ms', inverse=True) * 20,          # Low HRV
        get_norm_feature('skin_conductance') * 15,              # High skin conductance
        get_norm_feature('resp_irregularity') * 10,             # Irregular breathing
        get_norm_feature('valence', inverse=True) * 10          # Negative valence
    ]
    df_risk['Stress_Risk_Score'] = np.sum(stress_components, axis=0)
    disease_names = ['Alzheimer', 'Parkinson', 'Stress']

    for disease in disease_names:
        score_col = f'{disease}_Risk_Score'
        stage_col = f'{disease}_Risk_Stage'

        # Low: 0-33, Moderate: 33-66, High: 66-100
        scores = df_risk[score_col]
        stages = pd.cut(scores,
                       bins=[0, 33.33, 66.66, 100],
                       labels=[0, 1, 2],  # 0=Low, 1=Moderate, 2=High
                       include_lowest=True)
        df_risk[stage_col] = stages.astype(int)
    for disease in disease_names:
        score_col = f'{disease}_Risk_Score'
        stage_col = f'{disease}_Risk_Stage'

        print(f"\n{disease}:")
        print(f"  Score range: {df_risk[score_col].min():.1f} - {df_risk[score_col].max():.1f}")
        print(f"  Mean score: {df_risk[score_col].mean():.1f}")
        print(f"  Stage distribution:")
        stage_counts = df_risk[stage_col].value_counts().sort_index()
        for stage, count in stage_counts.items():
            stage_name = ['Low', 'Moderate', 'High'][stage]
            pct = (count / len(df_risk)) * 100
            print(f"    {stage_name}: {count} ({pct:.1f}%)")

    return df_risk

In [7]:
df = generate_disease_risks_and_stages(df)


Alzheimer:
  Score range: 27.2 - 88.2
  Mean score: 51.2
  Stage distribution:
    Low: 118 (0.1%)
    Moderate: 93425 (93.4%)
    High: 6457 (6.5%)

Parkinson:
  Score range: 18.5 - 67.0
  Mean score: 39.0
  Stage distribution:
    Low: 20610 (20.6%)
    Moderate: 79389 (79.4%)
    High: 1 (0.0%)

Stress:
  Score range: 7.4 - 52.7
  Mean score: 26.1
  Stage distribution:
    Low: 83540 (83.5%)
    Moderate: 16460 (16.5%)


In [8]:
score_cols = ['Alzheimer_Risk_Score', 'Parkinson_Risk_Score', 'Stress_Risk_Score']
stage_cols = ['Alzheimer_Risk_Stage', 'Parkinson_Risk_Stage', 'Stress_Risk_Stage']

In [10]:
feature_cols = [
    'sleep_stage', 'eeg_theta_power', 'eeg_gamma_power', 'eeg_delta_power',
    'heart_rate_bpm', 'hrv_ms', 'rem_bursts', 'chin_emg', 'respiration_rate',
    'resp_irregularity', 'skin_conductance', 'valence', 'arousal', 'mood', 'activity'
]

In [11]:
df_processed = df[feature_cols + score_cols + stage_cols].copy()

In [12]:
for col in feature_cols:
    if df_processed[col].dtype == "object":
        df_processed[col] = LabelEncoder().fit_transform(df_processed[col].astype(str))


In [13]:
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(df_processed[feature_cols]), columns=feature_cols)
y_scores = df_processed[score_cols]
y_stages = df_processed[stage_cols]

In [14]:
X_train, X_test, y_scores_train, y_scores_test, y_stages_train, y_stages_test = train_test_split(
    X, y_scores, y_stages, test_size=0.2, random_state=42
)

In [15]:
base_rf_reg = RandomForestRegressor(
    n_estimators=200,
    max_depth=10,
    min_samples_split=4,
    random_state=42,
    n_jobs=-1
)
multi_reg = MultiOutputRegressor(base_rf_reg)
multi_reg.fit(X_train, y_scores_train)

In [16]:
base_rf_clf = RandomForestClassifier(
    n_estimators=200,
    max_depth=8,
    min_samples_split=4,
    random_state=42,
    n_jobs=-1
)
multi_clf = MultiOutputClassifier(base_rf_clf)
multi_clf.fit(X_train, y_stages_train)

In [17]:
y_scores_pred = multi_reg.predict(X_test)

In [18]:
mae_scores, r2_scores = [], []
for i, col in enumerate(score_cols):
    mae = mean_absolute_error(y_scores_test[col], y_scores_pred[:, i])
    r2 = r2_score(y_scores_test[col], y_scores_pred[:, i])
    mae_scores.append(mae)
    r2_scores.append(r2)
    print(f"{col:30s} | MAE: {mae:.2f} | R²: {r2:.3f}")

Alzheimer_Risk_Score           | MAE: 1.02 | R²: 0.977
Parkinson_Risk_Score           | MAE: 0.82 | R²: 0.971
Stress_Risk_Score              | MAE: 0.54 | R²: 0.991


In [19]:
score_metrics = pd.DataFrame({
    "Disease": [col.replace('_Risk_Score', '') for col in score_cols],
    "MAE": mae_scores,
    "R2_Score": r2_scores
})
score_metrics.to_csv(OUTPUT_DIR / "score_metrics.csv", index=False)

In [20]:
y_stages_pred = multi_clf.predict(X_test)

In [21]:
stage_names = ['Low', 'Moderate', 'High']
accs, f1s = [], []
for i, col in enumerate(stage_cols):
    acc = accuracy_score(y_stages_test[col], y_stages_pred[:, i])
    f1 = f1_score(y_stages_test[col], y_stages_pred[:, i], average='weighted', zero_division=0)
    accs.append(acc)
    f1s.append(f1)
    print(f"{col:30s} | Acc: {acc:.3f} | F1: {f1:.3f}")

stage_metrics = pd.DataFrame({
    "Disease": [col.replace('_Risk_Stage', '') for col in stage_cols],
    "Accuracy": accs,
    "F1_Score": f1s
})
stage_metrics.to_csv(OUTPUT_DIR / "stage_metrics.csv", index=False)

Alzheimer_Risk_Stage           | Acc: 0.973 | F1: 0.970
Parkinson_Risk_Stage           | Acc: 0.944 | F1: 0.941
Stress_Risk_Stage              | Acc: 0.987 | F1: 0.987


In [22]:
for i, col in enumerate(stage_cols):
    plt.figure(figsize=(7, 6))
    cm = confusion_matrix(y_stages_test[col], y_stages_pred[:, i])
    sns.heatmap(cm, annot=True, fmt="d", cmap="YlOrRd",
                xticklabels=stage_names, yticklabels=stage_names)
    disease_name = col.replace('_Risk_Stage', '')
    plt.title(f"Confusion Matrix: {disease_name} Risk Stages")
    plt.xlabel("Predicted Stage")
    plt.ylabel("True Stage")
    plt.tight_layout()
    cm_path = OUTPUT_DIR / f"cm_stages_{disease_name}.png"
    plt.savefig(cm_path, dpi=150)
    plt.close()
    print(f"  Saved: {cm_path}")

  Saved: neuropredict_outputs/cm_stages_Alzheimer.png
  Saved: neuropredict_outputs/cm_stages_Parkinson.png
  Saved: neuropredict_outputs/cm_stages_Stress.png


In [23]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
for i, col in enumerate(score_cols):
    disease_name = col.replace('_Risk_Score', '')
    axes[i].hist(y_scores_test[col], bins=30, alpha=0.5, label='True', color='blue')
    axes[i].hist(y_scores_pred[:, i], bins=30, alpha=0.5, label='Predicted', color='red')
    axes[i].axvline(33.33, color='green', linestyle='--', linewidth=1, label='Low/Mod')
    axes[i].axvline(66.66, color='orange', linestyle='--', linewidth=1, label='Mod/High')
    axes[i].set_xlabel('Risk Score')
    axes[i].set_ylabel('Frequency')
    axes[i].set_title(f'{disease_name} Risk Score Distribution')
    axes[i].legend()
plt.tight_layout()
plt.savefig(OUTPUT_DIR / "score_distributions.png", dpi=150)
plt.close()

In [24]:
feature_names = X.columns.tolist()
mean_abs_shap = pd.DataFrame(index=feature_names)

for i, est in enumerate(multi_clf.estimators_):
    out_name = stage_cols[i].replace('_Risk_Stage', '')
    print(f"  → Explaining: {out_name}")
    try:
        explainer = shap.TreeExplainer(est)
        shap_values = explainer.shap_values(X_train[:1000])

        if isinstance(shap_values, list):
            # Average across all classes
            sv = np.mean([np.abs(sv) for sv in shap_values], axis=(0, 1))
        else:
            sv = np.mean(np.abs(shap_values), axis=0)

        mean_abs_shap[out_name] = sv
    except Exception as e:
        print(f"  ⚠️ SHAP failed for {out_name}: {e}")
        mean_abs_shap[out_name] = np.zeros(len(feature_names))


  → Explaining: Alzheimer
  ⚠️ SHAP failed for Alzheimer: Expected a 1D array, got an array with shape (15, 3)
  → Explaining: Parkinson
  ⚠️ SHAP failed for Parkinson: Expected a 1D array, got an array with shape (15, 3)
  → Explaining: Stress
  ⚠️ SHAP failed for Stress: Expected a 1D array, got an array with shape (15, 2)


In [25]:
plt.figure(figsize=(14, 6))
sns.heatmap(mean_abs_shap.T, cmap="vlag", linewidths=0.5, cbar_kws={'label': 'Mean |SHAP value|'})
plt.title("Feature Importance for Disease Risk Stage Predictions")
plt.ylabel("Disease Risk")
plt.xlabel("Features")
plt.tight_layout()
heatmap_path = OUTPUT_DIR / "shap_heatmap_stages.png"
plt.savefig(heatmap_path, dpi=150)
plt.close()

In [26]:
topk = 8
for out in mean_abs_shap.columns:
    top_feats = mean_abs_shap[out].abs().sort_values(ascending=False).head(topk)
    plt.figure(figsize=(9, 5))
    colors = plt.cm.RdYlGn_r(top_feats.values / top_feats.values.max())
    plt.barh(range(len(top_feats)), top_feats.values, color=colors)
    plt.yticks(range(len(top_feats)), top_feats.index)
    plt.xlabel("Mean |SHAP value|")
    plt.title(f"Top {topk} Features for {out} Risk Stage Prediction")
    plt.gca().invert_yaxis()
    plt.tight_layout()
    p = OUTPUT_DIR / f"top_features_{out}.png"
    plt.savefig(p, dpi=150)
    plt.close()

  colors = plt.cm.RdYlGn_r(top_feats.values / top_feats.values.max())
  colors = plt.cm.RdYlGn_r(top_feats.values / top_feats.values.max())
  colors = plt.cm.RdYlGn_r(top_feats.values / top_feats.values.max())


In [27]:
sample_results = pd.DataFrame({
    'Sample_Index': range(len(y_scores_test)),
    'Alzheimer_Score_True': y_scores_test['Alzheimer_Risk_Score'].values,
    'Alzheimer_Score_Pred': y_scores_pred[:, 0],
    'Alzheimer_Stage_True': y_stages_test['Alzheimer_Risk_Stage'].values,
    'Alzheimer_Stage_Pred': y_stages_pred[:, 0],
    'Parkinson_Score_True': y_scores_test['Parkinson_Risk_Score'].values,
    'Parkinson_Score_Pred': y_scores_pred[:, 1],
    'Parkinson_Stage_True': y_stages_test['Parkinson_Risk_Stage'].values,
    'Parkinson_Stage_Pred': y_stages_pred[:, 1],
    'Stress_Score_True': y_scores_test['Stress_Risk_Score'].values,
    'Stress_Score_Pred': y_scores_pred[:, 2],
    'Stress_Stage_True': y_stages_test['Stress_Risk_Stage'].values,
    'Stress_Stage_Pred': y_stages_pred[:, 2],
}).head(100)

In [28]:
sample_results.to_csv(OUTPUT_DIR / "sample_predictions.csv", index=False)

In [29]:
import pickle

In [30]:
from pathlib import Path

In [31]:
import seaborn as sns
from matplotlib.backends.backend_pdf import PdfPages
from datetime import datetime

In [32]:
def save_models_and_scaler(multi_reg, multi_clf, scaler, feature_cols, output_dir):
    OUTPUT_DIR = Path(output_dir)
    with open(OUTPUT_DIR / 'score_model.pkl', 'wb') as f:
        pickle.dump(multi_reg, f)

    with open(OUTPUT_DIR / 'stage_model.pkl', 'wb') as f:
        pickle.dump(multi_clf, f)
    with open(OUTPUT_DIR / 'scaler.pkl', 'wb') as f:
        pickle.dump(scaler, f)
    with open(OUTPUT_DIR / 'feature_names.pkl', 'wb') as f:
        pickle.dump(feature_cols, f)

In [33]:
def load_models(model_dir="neuropredict_outputs"):
    MODEL_DIR = Path(model_dir)

    with open(MODEL_DIR / 'score_model.pkl', 'rb') as f:
        score_model = pickle.load(f)

    with open(MODEL_DIR / 'stage_model.pkl', 'rb') as f:
        stage_model = pickle.load(f)

    with open(MODEL_DIR / 'scaler.pkl', 'rb') as f:
        scaler = pickle.load(f)

    with open(MODEL_DIR / 'feature_names.pkl', 'rb') as f:
        feature_names = pickle.load(f)
    return score_model, stage_model, scaler, feature_names


In [34]:
def predict_disease_risk(input_data, score_model, stage_model, scaler, feature_names):
    if isinstance(input_data, dict):
        df_input = pd.DataFrame([input_data])
    else:
        df_input = input_data.copy()
    for col in feature_names:
        if col not in df_input.columns:
            raise ValueError(f"Missing required feature: {col}")
    X_input = df_input[feature_names].copy()
    for col in X_input.columns:
        if X_input[col].dtype == "object":
            X_input[col] = LabelEncoder().fit_transform(X_input[col].astype(str))
    X_scaled = scaler.transform(X_input)
    scores = score_model.predict(X_scaled)
    stages = stage_model.predict(X_scaled)
    disease_names = ['Alzheimer', 'Parkinson', 'Stress']
    stage_names = ['Low', 'Moderate', 'High']

    results = []
    for i in range(len(X_scaled)):
        patient_result = {
            'sample_id': i,
            'predictions': {}
        }

        for j, disease in enumerate(disease_names):
            patient_result['predictions'][disease] = {
                'risk_score': round(float(scores[i, j]), 2),
                'risk_stage_numeric': int(stages[i, j]),
                'risk_stage_label': stage_names[int(stages[i, j])],
                'interpretation': interpret_risk(float(scores[i, j]), int(stages[i, j]))
            }

        results.append(patient_result)

    return results if len(results) > 1 else results[0]


def interpret_risk(score, stage):
    if stage == 0:
        return f"Low risk (score: {score:.1f}/100). Continue regular monitoring."
    elif stage == 1:
        return f"Moderate risk (score: {score:.1f}/100). Consider lifestyle interventions."
    else:
        return f"High risk (score: {score:.1f}/100). Recommend medical consultation."



In [35]:
def example_single_prediction():
    score_model, stage_model, scaler, feature_names = load_models()
    patient_data = {
        'sleep_stage': 2,              # 0=Wake, 1=N1, 2=N2, 3=N3, 4=REM
        'eeg_theta_power': 45.5,
        'eeg_gamma_power': 12.3,
        'eeg_delta_power': 78.9,
        'heart_rate_bpm': 72,
        'hrv_ms': 45,
        'rem_bursts': 8,
        'chin_emg': 15.2,
        'respiration_rate': 16,
        'resp_irregularity': 2.1,
        'skin_conductance': 3.5,
        'valence': 0.6,
        'arousal': 0.4,
        'mood': 1,                     # Encoded: 0=negative, 1=neutral, 2=positive
        'activity': 0                  # Encoded: 0=low, 1=medium, 2=high
    }
    result = predict_disease_risk(patient_data, score_model, stage_model, scaler, feature_names)

    for disease, metrics in result['predictions'].items():
        print(f"\n{disease} Disease Risk:")
        print(f"  Risk Score: {metrics['risk_score']}/100")
        print(f"  Risk Stage: {metrics['risk_stage_label']} ({metrics['risk_stage_numeric']})")
        print(f"  {metrics['interpretation']}")

In [37]:
def example_risk_report():

    score_model, stage_model, scaler, feature_names = load_models()

    # Patient data
    patient_data = {
        'sleep_stage': 2, 'eeg_theta_power': 45.5, 'eeg_gamma_power': 12.3,
        'eeg_delta_power': 78.9, 'heart_rate_bpm': 72, 'hrv_ms': 45,
        'rem_bursts': 8, 'chin_emg': 15.2, 'respiration_rate': 16,
        'resp_irregularity': 2.1, 'skin_conductance': 3.5, 'valence': 0.6,
        'arousal': 0.4, 'mood': 1, 'activity': 0
    }

    result = predict_disease_risk(patient_data, score_model, stage_model, scaler, feature_names)


    print("NEUROLOGICAL RISK ASSESSMENT REPORT")
    print(f"Assessment Date: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Patient ID: DEMO-001")


    for disease, metrics in result['predictions'].items():
        stage_emoji = "🟢" if metrics['risk_stage_numeric'] == 0 else "🟡" if metrics['risk_stage_numeric'] == 1 else "🔴"

        print(f"\n{stage_emoji} {disease.upper()} DISEASE RISK")
        print("-" * 70)
        print(f"Risk Score:  {metrics['risk_score']:.1f}/100")
        print(f"Risk Stage:  {metrics['risk_stage_label']}")
        print(f"Assessment:  {metrics['interpretation']}")

        # Risk visualization
        score = int(metrics['risk_score'])
        bar_length = 50
        filled = int((score / 100) * bar_length)
        bar = "█" * filled + "░" * (bar_length - filled)
        print(f"Visual:      [{bar}] {score}%")


    print("⚕️  Recommendations:")

    high_risks = [d for d, m in result['predictions'].items() if m['risk_stage_numeric'] == 2]
    mod_risks = [d for d, m in result['predictions'].items() if m['risk_stage_numeric'] == 1]

    if high_risks:
        print(f"🔴 HIGH PRIORITY: Schedule consultation for {', '.join(high_risks)}")
    if mod_risks:
        print(f"🟡 MODERATE: Monitor and consider preventive measures for {', '.join(mod_risks)}")
    if not high_risks and not mod_risks:
        print(f"🟢 Continue regular health monitoring and maintain healthy lifestyle")



In [38]:
def generate_pdf_report(patient_data, patient_info, output_path="neuropredict_outputs/risk_report.pdf"):

    score_model, stage_model, scaler, feature_names = load_models()

    result = predict_disease_risk(patient_data, score_model, stage_model, scaler, feature_names)

    pdf_path = Path(output_path)
    pdf_path.parent.mkdir(exist_ok=True)

    with PdfPages(pdf_path) as pdf:


        fig = plt.figure(figsize=(8.5, 11))
        fig.patch.set_facecolor('white')
        ax = fig.add_subplot(111)
        ax.axis('off')

        # Title
        ax.text(0.5, 0.85, 'NEUROLOGICAL RISK', ha='center', fontsize=28,
                fontweight='bold', color='#2c3e50')
        ax.text(0.5, 0.80, 'ASSESSMENT REPORT', ha='center', fontsize=28,
                fontweight='bold', color='#2c3e50')

        ax.plot([0.2, 0.8], [0.75, 0.75], 'k-', linewidth=2)

        # Patient Information
        y_pos = 0.65
        info_items = [
            ('Patient Name:', patient_info.get('name', 'N/A')),
            ('Patient ID:', patient_info.get('patient_id', 'N/A')),
            ('Age:', str(patient_info.get('age', 'N/A'))),
            ('Gender:', patient_info.get('gender', 'N/A')),
            ('Assessment Date:', patient_info.get('date', datetime.now().strftime('%Y-%m-%d')))
        ]

        for label, value in info_items:
            ax.text(0.3, y_pos, label, fontsize=12, fontweight='bold', ha='right')
            ax.text(0.32, y_pos, value, fontsize=12, ha='left')
            y_pos -= 0.05

        # Risk Summary Box
        ax.add_patch(plt.Rectangle((0.15, 0.25), 0.7, 0.15,
                                   fill=True, facecolor='#ecf0f1',
                                   edgecolor='#34495e', linewidth=2))
        ax.text(0.5, 0.36, 'RISK SUMMARY', ha='center', fontsize=14,
                fontweight='bold', color='#2c3e50')

        y_pos = 0.31
        for disease, metrics in result['predictions'].items():
            color = '#27ae60' if metrics['risk_stage_numeric'] == 0 else \
                   '#f39c12' if metrics['risk_stage_numeric'] == 1 else '#e74c3c'
            ax.text(0.25, y_pos, f"{disease}:", fontsize=11, ha='right', fontweight='bold')
            ax.text(0.27, y_pos, f"{metrics['risk_stage_label']} ({metrics['risk_score']:.1f})",
                   fontsize=11, ha='left', color=color, fontweight='bold')
            y_pos -= 0.04

        # Footer
        ax.text(0.5, 0.08, 'Generated by NeuroPredict AI System',
               ha='center', fontsize=9, style='italic', color='gray')
        ax.text(0.5, 0.05, f"Report Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
               ha='center', fontsize=8, color='gray')

        pdf.savefig(fig, bbox_inches='tight')
        plt.close()


        fig, axes = plt.subplots(2, 2, figsize=(8.5, 11))
        fig.suptitle('Disease Risk Assessment Details', fontsize=16, fontweight='bold', y=0.98)

        diseases = list(result['predictions'].keys())
        colors = ['#3498db', '#e74c3c', '#f39c12']

        # Individual risk gauges
        for idx, disease in enumerate(diseases):
            ax = axes[idx // 2, idx % 2]
            metrics = result['predictions'][disease]
            score = metrics['risk_score']

            # gauge chart
            theta = np.linspace(0, np.pi, 100)
            r = np.ones_like(theta)

            # Background
            ax.plot(theta, r, 'lightgray', linewidth=20, solid_capstyle='round')

            # Risk regions
            ax.plot(theta[:33], r[:33], '#27ae60', linewidth=20, solid_capstyle='round', label='Low')
            ax.plot(theta[33:67], r[33:67], '#f39c12', linewidth=20, solid_capstyle='round', label='Moderate')
            ax.plot(theta[67:], r[67:], '#e74c3c', linewidth=20, solid_capstyle='round', label='High')

            # Score indicator
            score_theta = (score / 100) * np.pi
            ax.plot([0, np.cos(score_theta)], [0, np.sin(score_theta)],
                   'k-', linewidth=3, marker='o', markersize=10)

            ax.set_ylim(0, 1.2)
            ax.set_xlim(-0.2, np.pi + 0.2)
            ax.axis('off')

            # Labels
            ax.text(0.5, -0.15, f"{disease} Risk", ha='center', fontsize=13,
                   fontweight='bold', transform=ax.transAxes)
            ax.text(0.5, -0.25, f"Score: {score:.1f}/100", ha='center', fontsize=11,
                   transform=ax.transAxes)
            ax.text(0.5, -0.35, f"Stage: {metrics['risk_stage_label']}", ha='center',
                   fontsize=10, color=colors[metrics['risk_stage_numeric']],
                   fontweight='bold', transform=ax.transAxes)

        # Hide unused subplot
        axes[1, 1].axis('off')

        # Add legend in unused subplot
        legend_ax = axes[1, 1]
        legend_elements = [
            plt.Line2D([0], [0], color='#27ae60', linewidth=10, label='Low Risk (0-33)'),
            plt.Line2D([0], [0], color='#f39c12', linewidth=10, label='Moderate Risk (33-66)'),
            plt.Line2D([0], [0], color='#e74c3c', linewidth=10, label='High Risk (66-100)')
        ]
        legend_ax.legend(handles=legend_elements, loc='center', fontsize=11, frameon=False)

        plt.tight_layout()
        pdf.savefig(fig, bbox_inches='tight')
        plt.close()


        fig = plt.figure(figsize=(8.5, 11))

        # Bar chart comparison
        ax1 = plt.subplot(3, 1, 1)
        diseases_list = list(result['predictions'].keys())
        scores = [result['predictions'][d]['risk_score'] for d in diseases_list]
        bar_colors = [colors[result['predictions'][d]['risk_stage_numeric']] for d in diseases_list]

        bars = ax1.barh(diseases_list, scores, color=bar_colors, edgecolor='black', linewidth=1.5)
        ax1.axvline(33.33, color='gray', linestyle='--', linewidth=1, alpha=0.5)
        ax1.axvline(66.66, color='gray', linestyle='--', linewidth=1, alpha=0.5)
        ax1.set_xlabel('Risk Score', fontsize=11)
        ax1.set_title('Comparative Risk Scores', fontsize=13, fontweight='bold')
        ax1.set_xlim(0, 100)

        # Add score labels on bars
        for i, (bar, score) in enumerate(zip(bars, scores)):
            ax1.text(score + 2, i, f'{score:.1f}', va='center', fontsize=10, fontweight='bold')

        # Risk distribution radar chart
        ax2 = plt.subplot(3, 1, 2, projection='polar')
        angles = np.linspace(0, 2 * np.pi, len(diseases_list), endpoint=False).tolist()
        scores_norm = [(s / 100) for s in scores]
        scores_norm += scores_norm[:1]  # Complete the circle
        angles += angles[:1]

        ax2.plot(angles, scores_norm, 'o-', linewidth=2, color='#3498db')
        ax2.fill(angles, scores_norm, alpha=0.25, color='#3498db')
        ax2.set_xticks(angles[:-1])
        ax2.set_xticklabels(diseases_list, fontsize=10)
        ax2.set_ylim(0, 1)
        ax2.set_title('Risk Profile', fontsize=13, fontweight='bold', pad=20)
        ax2.grid(True)

        # Recommendations text
        ax3 = plt.subplot(3, 1, 3)
        ax3.axis('off')

        y_pos = 0.9
        ax3.text(0.5, y_pos, 'Clinical Recommendations', fontsize=13,
                fontweight='bold', ha='center', transform=ax3.transAxes)
        y_pos -= 0.15

        for disease, metrics in result['predictions'].items():
            stage = metrics['risk_stage_numeric']
            color_map = {0: '#27ae60', 1: '#f39c12', 2: '#e74c3c'}
            symbol_map = {0: '✓', 1: '⚠', 2: '⚠'}

            ax3.text(0.05, y_pos, f"{symbol_map[stage]} {disease}:",
                    fontsize=11, fontweight='bold', color=color_map[stage],
                    transform=ax3.transAxes)

            y_pos -= 0.08

            # Wrap interpretation text
            interp = metrics['interpretation']
            words = interp.split()
            line = ""
            for word in words:
                if len(line + word) < 70:
                    line += word + " "
                else:
                    ax3.text(0.08, y_pos, line.strip(), fontsize=9,
                            transform=ax3.transAxes)
                    y_pos -= 0.06
                    line = word + " "
            if line:
                ax3.text(0.08, y_pos, line.strip(), fontsize=9,
                        transform=ax3.transAxes)

            y_pos -= 0.10

        plt.tight_layout()
        pdf.savefig(fig, bbox_inches='tight')
        plt.close()


        fig = plt.figure(figsize=(8.5, 11))
        fig.suptitle('Physiological Measurements', fontsize=16, fontweight='bold')


        if isinstance(patient_data, dict):
            data_df = pd.DataFrame([patient_data])
        else:
            data_df = patient_data.copy()


        ax = plt.subplot(1, 1, 1)
        ax.axis('off')

        measurements = []
        feature_labels = {
            'eeg_theta_power': 'EEG Theta Power',
            'eeg_gamma_power': 'EEG Gamma Power',
            'eeg_delta_power': 'EEG Delta Power',
            'heart_rate_bpm': 'Heart Rate (BPM)',
            'hrv_ms': 'Heart Rate Variability (ms)',
            'rem_bursts': 'REM Bursts',
            'chin_emg': 'Chin EMG',
            'respiration_rate': 'Respiration Rate',
            'resp_irregularity': 'Respiratory Irregularity',
            'skin_conductance': 'Skin Conductance',
            'valence': 'Valence',
            'arousal': 'Arousal'
        }

        for feature in feature_names:
            if feature in data_df.columns:
                label = feature_labels.get(feature, feature.replace('_', ' ').title())
                value = data_df[feature].iloc[0]
                measurements.append([label, f"{value:.2f}" if isinstance(value, (int, float)) else str(value)])

        table = ax.table(cellText=measurements,
                        colLabels=['Measurement', 'Value'],
                        cellLoc='left',
                        loc='center',
                        colWidths=[0.6, 0.3])

        table.auto_set_font_size(False)
        table.set_fontsize(10)
        table.scale(1, 2)

        # Style header
        for i in range(2):
            table[(0, i)].set_facecolor('#3498db')
            table[(0, i)].set_text_props(weight='bold', color='white')

        # Alternate row colors
        for i in range(1, len(measurements) + 1):
            for j in range(2):
                if i % 2 == 0:
                    table[(i, j)].set_facecolor('#ecf0f1')

        # Footer note
        plt.text(0.5, 0.02, 'Note: This report is generated by an AI system and should be reviewed by a medical professional.',
                ha='center', fontsize=8, style='italic', color='gray', transform=fig.transFigure)

        pdf.savefig(fig, bbox_inches='tight')
        plt.close()

        # Set PDF metadata
        d = pdf.infodict()
        d['Title'] = 'Neurological Risk Assessment Report'
        d['Author'] = 'NeuroPredict AI System'
        d['Subject'] = f"Risk Assessment for Patient {patient_info.get('patient_id', 'N/A')}"
        d['Keywords'] = 'Neurology, Risk Assessment, AI, Sleep Analysis'
        d['CreationDate'] = datetime.now()

    return pdf_path


In [39]:
def example_generate_pdf():

    # Patient information
    patient_info = {
        'name': 'Mishika Sardana',
        'patient_id': 'P-12345',
        'age': 20,
        'gender': 'Female',
        'date': '2025-10-19'
    }

    # Patient physiological data
    patient_data = {
        'sleep_stage': 2,
        'eeg_theta_power': 45.5,
        'eeg_gamma_power': 12.3,
        'eeg_delta_power': 78.9,
        'heart_rate_bpm': 72,
        'hrv_ms': 45,
        'rem_bursts': 8,
        'chin_emg': 15.2,
        'respiration_rate': 16,
        'resp_irregularity': 2.1,
        'skin_conductance': 3.5,
        'valence': 0.6,
        'arousal': 0.4,
        'mood': 1,
        'activity': 0
    }

    # Generate PDF report
    pdf_path = generate_pdf_report(patient_data, patient_info,
                                   output_path="neuropredict_outputs/patient_report.pdf")

    print(f"Location: {pdf_path}")


In [49]:
if __name__ == "__main__":
    try:
        example_single_prediction()
        example_risk_report()
        example_generate_pdf()
    except FileNotFoundError:
      print("error")






Alzheimer Disease Risk:
  Risk Score: 57.33/100
  Risk Stage: Moderate (1)
  Moderate risk (score: 57.3/100). Consider lifestyle interventions.

Parkinson Disease Risk:
  Risk Score: 61.33/100
  Risk Stage: Moderate (1)
  Moderate risk (score: 61.3/100). Consider lifestyle interventions.

Stress Disease Risk:
  Risk Score: 39.42/100
  Risk Stage: Moderate (1)
  Moderate risk (score: 39.4/100). Consider lifestyle interventions.




NEUROLOGICAL RISK ASSESSMENT REPORT
Assessment Date: 2025-10-19 13:33:31
Patient ID: DEMO-001

🟡 ALZHEIMER DISEASE RISK
----------------------------------------------------------------------
Risk Score:  57.3/100
Risk Stage:  Moderate
Assessment:  Moderate risk (score: 57.3/100). Consider lifestyle interventions.
Visual:      [████████████████████████████░░░░░░░░░░░░░░░░░░░░░░] 57%

🟡 PARKINSON DISEASE RISK
----------------------------------------------------------------------
Risk Score:  61.3/100
Risk Stage:  Moderate
Assessment:  Moderate risk (score: 61.3/100). Consider lifestyle interventions.
Visual:      [██████████████████████████████░░░░░░░░░░░░░░░░░░░░] 61%

🟡 STRESS DISEASE RISK
----------------------------------------------------------------------
Risk Score:  39.4/100
Risk Stage:  Moderate
Assessment:  Moderate risk (score: 39.4/100). Consider lifestyle interventions.
Visual:      [███████████████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 39%
⚕️  Recommendations:
🟡 MODERATE: Mon



Location: neuropredict_outputs/patient_report.pdf


In [50]:
import pickle
with open(OUTPUT_DIR / 'score_model.pkl', 'wb') as f:
    pickle.dump(multi_reg, f)
with open(OUTPUT_DIR / 'stage_model.pkl', 'wb') as f:
    pickle.dump(multi_clf, f)
with open(OUTPUT_DIR / 'scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)
with open(OUTPUT_DIR / 'feature_names.pkl', 'wb') as f:
    pickle.dump(feature_cols, f)