# Parkinson's Multi-Agent System - Training Notebook

This notebook trains all agents in the unified Parkinson's disease multi-agent system.

## What This Does:
1Ô∏è‚É£ Sets up environment and dependencies
2Ô∏è‚É£ Mounts Google Drive for data access
3Ô∏è‚É£ Trains Motor, Biomarker, and Non-Motor agents
4Ô∏è‚É£ Saves trained models to Google Drive
5Ô∏è‚É£ Visualizes training results

## Before Running:
- Upload PPMI data to `/content/drive/MyDrive/parkinson_data/`
- Required files:
  - `motor_merged.csv` (motor assessments)
  - `datscan.csv` (DaTSCAN imaging)
  - `non_motor_merged.csv` (non-motor assessments)

## 1. Setup Environment

In [None]:
# Check if running in Colab
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("‚úì Running in Google Colab")
    
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Clone repository (if not already present)
    import os
    if not os.path.exists('/content/parkinson'):
        print("Cloning repository...")
        !git clone https://github.com/your-repo/parkinson.git /content/parkinson
    
    # Change to project directory
    %cd /content/parkinson
else:
    print("‚úì Running locally")

In [None]:
# Install dependencies
!pip install -q pandas numpy scikit-learn lightgbm xgboost shap matplotlib seaborn tqdm joblib

print("‚úì Dependencies installed")

## 2. Import Libraries

In [None]:
import warnings
warnings.filterwarnings('ignore')

from config import Config
from training.pipeline import TrainingPipeline
from agents.motor_agent import MotorAgent
from agents.biomarker_agent import BiomarkerAgent
from agents.non_motor_agent import NonMotorAgent

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm

# Set plotting style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (10, 6)

print("‚úì Libraries imported")

## 3. Configure Paths

In [None]:
# Initialize configuration
cfg = Config()
cfg.setup()

print(f"Data directory: {cfg.paths.data_dir}")
print(f"Models directory: {cfg.paths.models_dir}")
print(f"Environment: {'Google Colab' if cfg.is_colab else 'Local'}")

## 4. Verify Data Files

In [None]:
import os
from pathlib import Path

data_files = {
    'Motor': 'motor_merged.csv',
    'DaTSCAN': 'datscan.csv',
    'Non-Motor': 'non_motor_merged.csv'
}

print("Checking data files...\n")
for name, filename in data_files.items():
    filepath = cfg.paths.raw_data_dir / filename
    exists = filepath.exists()
    status = "‚úì" if exists else "‚úó"
    print(f"{status} {name}: {filename} {'(found)' if exists else '(NOT FOUND)'}")

print("\nüí° Note: Upload missing files to your Google Drive parkinson_data folder")

## 5. Train All Agents

In [None]:
# Create training pipeline
pipeline = TrainingPipeline(config=cfg)

# Train all agents
agents = pipeline.train_all(
    agents_to_train=['motor', 'biomarker', 'non_motor_cognitive'],
    model_type='lightgbm'
)

print("\n" + "="*60)
print("Training Complete!")
print("="*60)

## 6. Visualize Training Results

In [None]:
# Extract training metrics
results = []
for agent_name, result in pipeline.training_results.items():
    if 'metrics' in result:
        metrics = result['metrics']
        results.append({
            'Agent': agent_name,
            'MAE': metrics.get('mae', 0),
            'R¬≤': metrics.get('r2', 0),
            'RMSE': metrics.get('rmse', 0)
        })

if results:
    results_df = pd.DataFrame(results)
    
    # Plot metrics
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    
    # MAE
    axes[0].bar(results_df['Agent'], results_df['MAE'], color='steelblue')
    axes[0].set_title('Mean Absolute Error (MAE)', fontsize=12, fontweight='bold')
    axes[0].set_ylabel('MAE')
    axes[0].tick_params(axis='x', rotation=45)
    
    # R¬≤
    axes[1].bar(results_df['Agent'], results_df['R¬≤'], color='green', alpha=0.7)
    axes[1].set_title('R¬≤ Score', fontsize=12, fontweight='bold')
    axes[1].set_ylabel('R¬≤')
    axes[1].set_ylim([0, 1])
    axes[1].tick_params(axis='x', rotation=45)
    
    # RMSE
    axes[2].bar(results_df['Agent'], results_df['RMSE'], color='coral')
    axes[2].set_title('Root Mean Squared Error (RMSE)', fontsize=12, fontweight='bold')
    axes[2].set_ylabel('RMSE')
    axes[2].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    print("\nTraining Metrics Summary:")
    print(results_df.to_string(index=False))
else:
    print("No training metrics available (agents may use rule-based assessment)")

## 7. Test Predictions

In [None]:
# Example patient profiles
motor_profile = {'NUPDRS3_BL': 20.0, 'months_since_bl': 0.0}
bio_profile = {'putamen_mean_sbr': 1.8, 'striatal_asym': 0.4, 'low_dat_risk': 1}
nm_profile = {'updrs_nonmotor_cognitive_BL': 22.0, 'months_since_bl': 0.0}

print("Making test predictions...\n")

# Motor agent
if 'motor' in agents:
    motor_payload = agents['motor'].analyze(patient_profile=motor_profile)
    print(f"Motor Agent: {motor_payload}")
    print(f"  Narrative: {motor_payload.clinical_narrative}\n")

# Biomarker agent
if 'biomarker' in agents:
    bio_payload = agents['biomarker'].analyze(patient_profile=bio_profile)
    print(f"Biomarker Agent: {bio_payload}")
    print(f"  Narrative: {bio_payload.clinical_narrative}\n")

# Non-motor agent
if 'non_motor_cognitive' in agents:
    nm_payload = agents['non_motor_cognitive'].analyze(patient_profile=nm_profile)
    print(f"Non-Motor Agent: {nm_payload}")
    print(f"  Narrative: {nm_payload.clinical_narrative}")

## 8. Multi-Agent Orchestration Test

In [None]:
from orchestrator import ClinicalOrchestrator

# Create orchestrator
orch = ClinicalOrchestrator(config=cfg)

# Collect payloads
payloads = []
if 'motor' in agents:
    payloads.append(agents['motor'].analyze(patient_profile=motor_profile))
if 'biomarker' in agents:
    payloads.append (agents['biomarker'].analyze(patient_profile=bio_profile))
if 'non_motor_cognitive' in agents:
    payloads.append(agents['non_motor_cognitive'].analyze(patient_profile=nm_profile))

# Fuse predictions
fusion_result = orch.uncertainty_aware_fusion(payloads)
ci = orch.calculate_confidence_interval(payloads)
report = orch.generate_report(fusion_result)

print("="*60)
print("MULTI-AGENT CLINICAL ASSESSMENT")
print("="*60)
print(f"\nGlobal Risk Score: {fusion_result['global_risk_score']:.2f} ¬± {ci:.2f}")
print(f"\n{report}")
print("\n" + "="*60)

## 9. Save Models to Google Drive

In [None]:
# Models are automatically saved during training to cfg.paths.models_dir
print(f"Models saved to: {cfg.paths.models_dir}")

# List saved models
import os
models_path = cfg.paths.models_dir
if models_path.exists():
    print("\nSaved models:")
    for agent_dir in models_path.iterdir():
        if agent_dir.is_dir():
            print(f"   {agent_dir.name}/")
            for file in agent_dir.iterdir():
                print(f"    - {file.name}")
else:
    print("No models directory found")

print("\n‚úì Training complete! Models saved to Google Drive.")

##  Training Complete!

Your trained models are now saved in Google Drive and ready for use.

### Next Steps:
1.  Run `inference_demo.ipynb` to test predictions
2.  Use the trained agents in your research
3.  Share models with collaborators via Drive

### Notes:
- Models are saved in pickle format for easy loading
- Metadata includes training metrics and timestamps
- SHAP explainers are saved with models for interpretability