# AMERS — Bulletproof Setup & Run (Google Colab)

**Every cell is self-contained.** You can re-run any cell after a runtime restart — just run Cell 1 (Mount Drive) first, then jump to wherever you left off.

- Cells **skip automatically** if their output already exists on Drive
- All progress is saved to Google Drive — nothing is lost on restart
- A **Status Dashboard** (Cell 3) shows what's done and what's pending

**Requirements:** Colab Pro (GPU runtime), DEAP + IEMOCAP datasets on Google Drive.

## 1. Mount Google Drive & Check GPU (ALWAYS RUN THIS FIRST)

In [None]:
# ╔══════════════════════════════════════════════╗
# ║  RUN THIS CELL FIRST after every restart    ║
# ╚══════════════════════════════════════════════╝
import os
from pathlib import Path

# Mount Drive (idempotent — safe to re-run)
from google.colab import drive
drive.mount('/content/drive')

# Verify GPU
import torch
print(f'PyTorch {torch.__version__}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')
    !nvidia-smi | head -12
else:
    print('WARNING: No GPU detected! Go to Runtime → Change runtime type → GPU')

# Quick sanity check
DRIVE_BASE = Path('/content/drive/MyDrive/AMERS')
if DRIVE_BASE.exists():
    print(f'\n Drive base found: {DRIVE_BASE}')
else:
    print(f'\n Drive base NOT found — will be created in Cell 2')

## 2. Clone Repo + Install Dependencies + Setup Dirs

In [None]:
# ── Self-contained: no dependency on previous cell variables ──
import os, subprocess
from pathlib import Path

GITHUB_USER = 'RAVINDRA8008'
REPO_NAME   = 'MAJORDRAFT'
REPO_DIR    = '/content/amers'
DRIVE_BASE  = Path('/content/drive/MyDrive/AMERS')

# If repo is PRIVATE, paste your token here:
GITHUB_TOKEN = ''  # e.g. 'ghp_xxxxxxxxxxxxxxxxxxxx'

# ── Clone or pull ──
if GITHUB_TOKEN:
    url = f'https://{GITHUB_USER}:{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{REPO_NAME}.git'
else:
    url = f'https://github.com/{GITHUB_USER}/{REPO_NAME}.git'

if not os.path.exists(REPO_DIR):
    r = subprocess.run(['git', 'clone', url, REPO_DIR], capture_output=True, text=True)
    if r.returncode != 0:
        print(f'Clone failed: {r.stderr}')
        print('If private repo: set GITHUB_TOKEN above, or make repo public')
        raise RuntimeError('git clone failed')
    print('Repository cloned.')
else:
    subprocess.run(['git', '-C', REPO_DIR, 'pull', '--ff-only'], capture_output=True)
    print('Repository updated (git pull).')

os.chdir(REPO_DIR)

# ── Install deps (pip caches — fast on re-run) ──
!pip install -q -r requirements.txt 2>&1 | tail -3
print('Dependencies ready.')

# ── Create ALL Drive directories ──
for sub in [
    'data/deap/raw', 'data/deap/processed',
    'data/iemocap/raw', 'data/iemocap/processed',
    'outputs/checkpoints/eeg', 'outputs/checkpoints/gan',
    'outputs/checkpoints/speech', 'outputs/checkpoints/fusion',
    'outputs/checkpoints/rl',
    'outputs', 'logs',
]:
    (DRIVE_BASE / sub).mkdir(parents=True, exist_ok=True)
print('Drive directory structure ready.')
print(f'\nAll good! Working dir: {os.getcwd()}')

## 3. STATUS DASHBOARD — Run anytime to see progress

In [None]:
# ╔══════════════════════════════════════════════════════════╗
# ║  STATUS DASHBOARD — shows what's done & what's pending  ║
# ╚══════════════════════════════════════════════════════════╝
from pathlib import Path

DB = Path('/content/drive/MyDrive/AMERS')
CKPT = DB / 'outputs' / 'checkpoints'

checks = [
    ('DEAP extracted',        bool(list((DB/'data/deap/raw').glob('*.dat')))),
    ('IEMOCAP extracted',     bool(list((DB/'data/iemocap/raw').glob('Session*')))),
    ('DEAP preprocessed',     (DB/'data/deap/processed/s01_features.npy').exists()),
    ('IEMOCAP preprocessed',  (DB/'data/iemocap/processed/session1_features.npy').exists()),
    ('EEG encoder trained',   (CKPT/'eeg/eeg_encoder_final.pt').exists()),
    ('GAN trained',           (CKPT/'gan/gan_final.pt').exists()),
    ('Speech encoder trained',(CKPT/'speech/speech_encoder_final.pt').exists()),
    ('Fusion trained',        (CKPT/'fusion/best_fusion_baseline.pt').exists()),
    ('RL agent trained',      (CKPT/'rl/ppo_agent_final.pt').exists()),
    ('Evaluation done',       (DB/'outputs/report.md').exists()),
]

print('='*52)
print('  AMERS Pipeline Status')
print('='*52)
all_done = True
for name, done in checks:
    icon = ' DONE' if done else ' PENDING'
    print(f'  {icon}  {name}')
    if not done:
        all_done = False
print('='*52)

if all_done:
    print('  ALL STAGES COMPLETE! Jump to Cell 14 to view results.')
else:
    # Find first pending
    for i, (name, done) in enumerate(checks):
        if not done:
            cell_map = {0: 4, 1: 5, 2: 6, 3: 7, 4: 8, 5: 9, 6: 10, 7: 11, 8: 12, 9: 13}
            print(f'\n  Next step: run Cell {cell_map.get(i, "?")} ({name})'  )
            break

## 4. Extract DEAP Dataset (skips if already done)

In [None]:
# ── Self-contained: DEAP extraction ──
import zipfile, shutil
from pathlib import Path

DRIVE_BASE = Path('/content/drive/MyDrive/AMERS')
DEAP_RAW = DRIVE_BASE / 'data' / 'deap' / 'raw'
DEAP_RAW.mkdir(parents=True, exist_ok=True)

DEAP_FILE_ID = '1Gl-itg2kqDYW1MH5K3CTlzJUFT69ZrU5'

dat_files = list(DEAP_RAW.glob('*.dat'))
if len(dat_files) >= 32:
    print(f'SKIP: DEAP already extracted ({len(dat_files)} .dat files)')
else:
    deap_zip = DRIVE_BASE / 'data' / 'deap' / 'deap_dataset.zip'
    if not deap_zip.exists():
        print('Downloading DEAP zip from Drive...')
        !gdown --id {DEAP_FILE_ID} -O {str(deap_zip)}

    print('Extracting DEAP .dat files...')
    with zipfile.ZipFile(deap_zip, 'r') as z:
        dat_entries = [n for n in z.namelist() if n.endswith('.dat')]
        print(f'  Found {len(dat_entries)} .dat files inside zip')
        for entry in dat_entries:
            z.extract(entry, DEAP_RAW)

    # Flatten any subdirectories
    for f in list(DEAP_RAW.rglob('*.dat')):
        dest = DEAP_RAW / f.name
        if f != dest:
            shutil.move(str(f), str(dest))
    for d in sorted(DEAP_RAW.rglob('*'), reverse=True):
        if d.is_dir():
            try: d.rmdir()
            except OSError: pass

    final = list(DEAP_RAW.glob('*.dat'))
    print(f'DONE: {len(final)} .dat files in {DEAP_RAW}')

## 5. Extract IEMOCAP Dataset (skips if already done)
Extracts to **local SSD** first (fast), then copies to Drive. Takes ~5-10 min on first run.

In [None]:
# ── Self-contained: IEMOCAP extraction ──
import zipfile, shutil
from pathlib import Path

DRIVE_BASE = Path('/content/drive/MyDrive/AMERS')
IEMOCAP_RAW = DRIVE_BASE / 'data' / 'iemocap' / 'raw'
IEMOCAP_RAW.mkdir(parents=True, exist_ok=True)
LOCAL_TMP = Path('/content/iemocap_extract_tmp')

IEMOCAP_FILE_ID = '1lIzIlkQxwiWS4zeld-kQf87zbvPpS9gd'

sessions = list(IEMOCAP_RAW.glob('Session*'))
if len(sessions) >= 5:
    print(f'SKIP: IEMOCAP already extracted ({len(sessions)} sessions)')
else:
    iemocap_zip = DRIVE_BASE / 'data' / 'iemocap' / 'iemocap_dataset.zip'
    if not iemocap_zip.exists():
        print('Downloading IEMOCAP zip from Drive...')
        !gdown --id {IEMOCAP_FILE_ID} -O {str(iemocap_zip)}

    # Step 1: Unzip to local SSD (fast)
    LOCAL_TMP.mkdir(parents=True, exist_ok=True)
    print('Step 1/3: Extracting outer zip to local SSD...')
    with zipfile.ZipFile(iemocap_zip, 'r') as z:
        z.extractall(LOCAL_TMP)
    print('  Done.')

    # Step 2: Extract inner tar.gz (if any)
    tar_files = list(LOCAL_TMP.rglob('*.tar.gz'))
    for tgz in tar_files:
        print(f'Step 2/3: Extracting {tgz.name} (~5 min)...')
        !cd {str(LOCAL_TMP)} && tar xzf {str(tgz)}
        tgz.unlink()
        print('  Done.')

    # Find Session dirs
    session_dirs = sorted([d for d in LOCAL_TMP.rglob('Session*') if d.is_dir() and d.name.startswith('Session')])
    print(f'  Found {len(session_dirs)} Session dirs locally')

    # Step 3: Copy to Drive
    print('Step 3/3: Copying to Drive...')
    for sd in session_dirs:
        dest = IEMOCAP_RAW / sd.name
        if not dest.exists():
            print(f'  {sd.name}...', end=' ', flush=True)
            shutil.copytree(str(sd), str(dest))
            print('done')
        else:
            print(f'  {sd.name} already on Drive')

    # Cleanup
    shutil.rmtree(LOCAL_TMP, ignore_errors=True)

    final = list(IEMOCAP_RAW.glob('Session*'))
    print(f'\nDONE: {len(final)} sessions in {IEMOCAP_RAW}')

## 6. Preprocess DEAP (EEG → Differential Entropy features)

In [None]:
# ── Self-contained: Preprocess DEAP ──
import os
from pathlib import Path

os.chdir('/content/amers')
DB = Path('/content/drive/MyDrive/AMERS')
sentinel = DB / 'data' / 'deap' / 'processed' / 's01_features.npy'

if sentinel.exists():
    n = len(list((DB / 'data/deap/processed').glob('*_features.npy')))
    print(f'SKIP: DEAP already preprocessed ({n} subject feature files)')
else:
    print('Preprocessing DEAP...')
    !python scripts/preprocess_deap.py --config config/default.yaml
    if sentinel.exists():
        print('DONE: DEAP preprocessing complete')
    else:
        print('WARNING: Preprocessing ran but sentinel file not found — check output above')

## 7. Preprocess IEMOCAP (Speech → MFCC features)

In [None]:
# ── Self-contained: Preprocess IEMOCAP ──
import os
from pathlib import Path

os.chdir('/content/amers')
DB = Path('/content/drive/MyDrive/AMERS')
sentinel = DB / 'data' / 'iemocap' / 'processed' / 'session1_features.npy'

if sentinel.exists():
    n = len(list((DB / 'data/iemocap/processed').glob('*_features.npy')))
    print(f'SKIP: IEMOCAP already preprocessed ({n} session feature files)')
else:
    print('Preprocessing IEMOCAP (this may take several minutes)...')
    !python scripts/preprocess_iemocap.py --config config/default.yaml
    if sentinel.exists():
        print('DONE: IEMOCAP preprocessing complete')
    else:
        print('WARNING: Preprocessing ran but sentinel file not found — check output above')

## 8. Train EEG Encoder (pre-train on DEAP)

In [None]:
# ── Self-contained: Train EEG Encoder ──
import os
from pathlib import Path

os.chdir('/content/amers')
CKPT = Path('/content/drive/MyDrive/AMERS/outputs/checkpoints')
sentinel = CKPT / 'eeg' / 'eeg_encoder_final.pt'

if sentinel.exists():
    sz = sentinel.stat().st_size / 1024
    print(f'SKIP: EEG encoder already trained ({sz:.0f} KB checkpoint)')
else:
    print('Training EEG encoder...')
    !python scripts/train_eeg.py --config config/default.yaml
    if sentinel.exists():
        print('DONE: EEG encoder training complete')
    else:
        print('ERROR: Training finished but checkpoint not found — check output above')

## 9. Train GAN (Conditional GAN on EEG features)

In [None]:
# ── Self-contained: Train GAN ──
import os
from pathlib import Path

os.chdir('/content/amers')
CKPT = Path('/content/drive/MyDrive/AMERS/outputs/checkpoints')
sentinel = CKPT / 'gan' / 'gan_final.pt'

if sentinel.exists():
    sz = sentinel.stat().st_size / 1024
    print(f'SKIP: GAN already trained ({sz:.0f} KB checkpoint)')
else:
    print('Training Conditional GAN (100 epochs)...')
    !python scripts/train_gan.py --config config/default.yaml
    if sentinel.exists():
        print('DONE: GAN training complete')
    else:
        print('ERROR: Training finished but checkpoint not found — check output above')

## 10. Train Speech Encoder (CNN-LSTM on IEMOCAP)

In [None]:
# ── Self-contained: Train Speech Encoder ──
import os
from pathlib import Path

os.chdir('/content/amers')
CKPT = Path('/content/drive/MyDrive/AMERS/outputs/checkpoints')
sentinel = CKPT / 'speech' / 'speech_encoder_final.pt'

if sentinel.exists():
    sz = sentinel.stat().st_size / 1024
    print(f'SKIP: Speech encoder already trained ({sz:.0f} KB checkpoint)')
else:
    print('Training Speech Encoder (30 epochs)...')
    !python scripts/train_speech.py --config config/default.yaml
    if sentinel.exists():
        print('DONE: Speech encoder training complete')
    else:
        print('ERROR: Training finished but checkpoint not found — check output above')

## 11. Train Fusion Classifier (baseline, no RL)

In [None]:
# ── Self-contained: Train Fusion Classifier ──
import os
from pathlib import Path

os.chdir('/content/amers')
CKPT = Path('/content/drive/MyDrive/AMERS/outputs/checkpoints')
sentinel = CKPT / 'fusion' / 'best_fusion_baseline.pt'

if sentinel.exists():
    sz = sentinel.stat().st_size / 1024
    print(f'SKIP: Fusion classifier already trained ({sz:.0f} KB checkpoint)')
else:
    # Check prerequisites
    missing = []
    if not (CKPT / 'eeg' / 'eeg_encoder_final.pt').exists():
        missing.append('EEG encoder (Cell 8)')
    if not (CKPT / 'speech' / 'speech_encoder_final.pt').exists():
        missing.append('Speech encoder (Cell 10)')
    if missing:
        print(f'BLOCKED: Need to train first: {", ".join(missing)}')
    else:
        print('Training Fusion Classifier (50 epochs)...')
        !python scripts/train_fusion.py --config config/default.yaml
        if sentinel.exists():
            print('DONE: Fusion training complete')
        else:
            print('ERROR: Training finished but checkpoint not found — check output above')

## 12. Train RL Agent (PPO augmentation control)

In [None]:
# ── Self-contained: Train RL Agent ──
import os
from pathlib import Path

os.chdir('/content/amers')
CKPT = Path('/content/drive/MyDrive/AMERS/outputs/checkpoints')
sentinel = CKPT / 'rl' / 'ppo_agent_final.pt'

if sentinel.exists():
    sz = sentinel.stat().st_size / 1024
    print(f'SKIP: RL agent already trained ({sz:.0f} KB checkpoint)')
else:
    # Check all prerequisites
    missing = []
    if not (CKPT / 'gan' / 'gan_final.pt').exists():
        missing.append('GAN (Cell 9)')
    if not (CKPT / 'eeg' / 'eeg_encoder_final.pt').exists():
        missing.append('EEG encoder (Cell 8)')
    if not (CKPT / 'speech' / 'speech_encoder_final.pt').exists():
        missing.append('Speech encoder (Cell 10)')
    if missing:
        print(f'BLOCKED: Need to train first: {", ".join(missing)}')
    else:
        print('Training RL Agent (PPO)...')
        !python scripts/train_rl.py --config config/default.yaml
        if sentinel.exists():
            print('DONE: RL training complete')
        else:
            print('ERROR: Training finished but checkpoint not found — check output above')

## 13. Evaluate Model

In [None]:
# ── Self-contained: Evaluate ──
import os
from pathlib import Path

os.chdir('/content/amers')
OUT = Path('/content/drive/MyDrive/AMERS/outputs')
CKPT = OUT / 'checkpoints'
sentinel = OUT / 'report.md'

if sentinel.exists():
    print(f'SKIP: Evaluation already done (report at {sentinel})')
    print('Delete the report file and re-run to regenerate.')
else:
    # Check prerequisites
    missing = []
    if not (CKPT / 'eeg' / 'eeg_encoder_final.pt').exists():
        missing.append('EEG encoder (Cell 8)')
    if not (CKPT / 'speech' / 'speech_encoder_final.pt').exists():
        missing.append('Speech encoder (Cell 10)')
    has_rl = (CKPT / 'rl' / 'best_fusion.pt').exists()
    has_bl = (CKPT / 'fusion' / 'best_fusion_baseline.pt').exists()
    if not has_rl and not has_bl:
        missing.append('Fusion or RL (Cell 11 or 12)')
    if missing:
        print(f'BLOCKED: Need: {", ".join(missing)}')
    else:
        tag = 'RL-tuned' if has_rl else 'baseline'
        print(f'Evaluating {tag} model...')
        !python scripts/evaluate.py --config config/default.yaml
        if sentinel.exists():
            print('DONE: Evaluation complete — see Cell 14 for results')
        else:
            print('WARNING: Evaluation ran but report not generated — check output above')

## 14. View Results

In [None]:
# ── Self-contained: View results ──
from pathlib import Path
from IPython.display import Image, display, Markdown

OUT = Path('/content/drive/MyDrive/AMERS/outputs')

# Confusion matrix
cm = OUT / 'confusion_matrix.png'
if cm.exists():
    print('=== Confusion Matrix ===')
    display(Image(str(cm), width=500))
else:
    print('No confusion matrix yet (run Cell 13 first)')

# t-SNE
tsne = OUT / 'tsne_embeddings.png'
if tsne.exists():
    print('\n=== t-SNE Embeddings ===')
    display(Image(str(tsne), width=500))

# Report
report = OUT / 'report.md'
if report.exists():
    print('\n=== Evaluation Report ===')
    display(Markdown(report.read_text()))
else:
    print('No report yet (run Cell 13 first)')

# Loss/accuracy plots
for name in ['eeg_loss.png', 'gan_loss.png', 'speech_loss.png', 'speech_acc.png',
             'fusion_loss.png', 'fusion_acc.png', 'rl_aug_ratios.png']:
    p = OUT / name
    if p.exists():
        print(f'\n=== {name} ===')
        display(Image(str(p), width=500))

## 15. Push Updates to GitHub (optional)

In [None]:
# Uncomment to push changes from Colab back to GitHub
# import os; os.chdir('/content/amers')
# !git add -A && git commit -m 'Update from Colab' && git push