# AMERS — Setup & Run (Google Colab)

This notebook sets up the full environment on Colab Pro, mounts Drive,
clones the repo, installs dependencies, and walks through the training pipeline.

**Requirements:** Colab Pro (GPU runtime), datasets on Google Drive.

## 1. Mount Google Drive & Check GPU

In [1]:
from google.colab import drive
drive.mount('/content/drive')

!nvidia-smi
import torch
print(f'PyTorch {torch.__version__}, CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

Mounted at /content/drive
/bin/bash: line 1: nvidia-smi: command not found
PyTorch 2.10.0+cpu, CUDA available: False


## 2. Clone Repository & Install Dependencies

In [23]:
import os, subprocess

# ── Configuration ──
GITHUB_USER = 'RAVINDRA8008'
REPO_NAME   = 'MAJORDRAFT'
REPO_DIR    = '/content/amers'

# If your repo is PRIVATE, generate a token at:
#   https://github.com/settings/tokens → "Generate new token (classic)" → check "repo" scope
# Then paste it below (remove the quotes placeholder):
GITHUB_TOKEN = ''  # e.g. 'ghp_xxxxxxxxxxxxxxxxxxxx'

# Build URL (with or without token)
if GITHUB_TOKEN:
    REPO_URL = f'https://{GITHUB_USER}:{GITHUB_TOKEN}@github.com/{GITHUB_USER}/{REPO_NAME}.git'
else:
    REPO_URL = f'https://github.com/{GITHUB_USER}/{REPO_NAME}.git'

# Clone or pull
if not os.path.exists(REPO_DIR):
    result = subprocess.run(['git', 'clone', REPO_URL, REPO_DIR], capture_output=True, text=True)
    if result.returncode != 0:
        print(f'Clone failed: {result.stderr}')
        print('\n⚠️  If the repo is private, either:')
        print('   1. Make it public: GitHub → Repo Settings → Danger Zone → Change visibility')
        print('   2. Set GITHUB_TOKEN above with a personal access token')
        raise RuntimeError('Git clone failed')
    print('✓ Repository cloned.')
else:
    subprocess.run(['git', '-C', REPO_DIR, 'pull'], check=True)
    print('✓ Repository updated.')

os.chdir(REPO_DIR)
!pip install -q -r requirements.txt
print('\n✓ Dependencies installed.')

✓ Repository updated.

✓ Dependencies installed.


## 3. Setup Drive Directory Structure

In [6]:
from pathlib import Path

DRIVE_BASE = Path('/content/drive/MyDrive/AMERS')

dirs = [
    DRIVE_BASE / 'data' / 'deap' / 'raw',
    DRIVE_BASE / 'data' / 'deap' / 'processed',
    DRIVE_BASE / 'data' / 'iemocap' / 'raw',
    DRIVE_BASE / 'data' / 'iemocap' / 'processed',
    DRIVE_BASE / 'checkpoints',
    DRIVE_BASE / 'outputs',
    DRIVE_BASE / 'logs',
]

for d in dirs:
    d.mkdir(parents=True, exist_ok=True)
    print(f'  ✓ {d}')

print('\n✓ Drive structure ready.')

  ✓ /content/drive/MyDrive/AMERS/data/deap/raw
  ✓ /content/drive/MyDrive/AMERS/data/deap/processed
  ✓ /content/drive/MyDrive/AMERS/data/iemocap/raw
  ✓ /content/drive/MyDrive/AMERS/data/iemocap/processed
  ✓ /content/drive/MyDrive/AMERS/checkpoints
  ✓ /content/drive/MyDrive/AMERS/outputs
  ✓ /content/drive/MyDrive/AMERS/logs

✓ Drive structure ready.


In [24]:
!rm -rf /content/drive/MyDrive/AMERS/data/iemocap/raw/*

## 4. Extract Datasets from Drive

Make sure you've uploaded the DEAP and IEMOCAP zip/tar files to your Drive.

- **DEAP:** File ID `1Gl-itg2kqDYW1MH5K3CTlzJUFT69ZrU5`
- **IEMOCAP:** File ID `1lIzIlkQxwiWS4zeld-kQf87zbvPpS9gd`

In [None]:
import zipfile, tarfile, shutil

# ── DEAP ──
DEAP_FILE_ID = '1Gl-itg2kqDYW1MH5K3CTlzJUFT69ZrU5'
DEAP_RAW = DRIVE_BASE / 'data' / 'deap' / 'raw'

if not list(DEAP_RAW.glob('*.dat')):
    deap_zip = DRIVE_BASE / 'data' / 'deap' / 'deap_dataset.zip'
    if not deap_zip.exists():
        !gdown --id {DEAP_FILE_ID} -O {str(deap_zip)}

    print('Extracting DEAP .dat files ...')
    with zipfile.ZipFile(deap_zip, 'r') as z:
        dat_entries = [n for n in z.namelist() if n.endswith('.dat')]
        print(f'  Found {len(dat_entries)} .dat files inside zip')
        for entry in dat_entries:
            z.extract(entry, DEAP_RAW)

    for f in list(DEAP_RAW.rglob('*.dat')):
        dest = DEAP_RAW / f.name
        if f != dest:
            shutil.move(str(f), str(dest))
    for d in sorted(DEAP_RAW.rglob('*'), reverse=True):
        if d.is_dir():
            try: d.rmdir()
            except OSError: pass

    print(f'  ✓ DEAP: {len(list(DEAP_RAW.glob("*.dat")))} .dat files ready')
else:
    print(f'  ✓ DEAP already extracted: {len(list(DEAP_RAW.glob("*.dat")))} files')

# ── IEMOCAP ──
IEMOCAP_FILE_ID = '1lIzIlkQxwiWS4zeld-kQf87zbvPpS9gd'
IEMOCAP_RAW = DRIVE_BASE / 'data' / 'iemocap' / 'raw'

if not list(IEMOCAP_RAW.glob('Session*')):
    iemocap_zip = DRIVE_BASE / 'data' / 'iemocap' / 'iemocap_dataset.zip'
    if not iemocap_zip.exists():
        !gdown --id {IEMOCAP_FILE_ID} -O {str(iemocap_zip)}

    # Step 1: Extract the outer zip
    print('Extracting IEMOCAP zip ...')
    with zipfile.ZipFile(iemocap_zip, 'r') as z:
        z.extractall(IEMOCAP_RAW)

    # Step 2: If it contains a .tar.gz, extract that too
    tar_files = list(IEMOCAP_RAW.rglob('*.tar.gz'))
    for tgz in tar_files:
        print(f'  Found inner archive: {tgz.name} — extracting (this takes a few minutes)...')
        with tarfile.open(tgz, 'r:gz') as t:
            t.extractall(IEMOCAP_RAW)
        tgz.unlink()  # remove tar.gz after extraction
        print(f'  ✓ Inner archive extracted')

    # Step 3: Flatten — move Session dirs to top level if nested
    for nested in sorted(IEMOCAP_RAW.rglob('Session*')):
        if nested.is_dir() and nested.parent != IEMOCAP_RAW:
            dest = IEMOCAP_RAW / nested.name
            if not dest.exists():
                shutil.move(str(nested), str(dest))

    # Clean empty dirs
    for d in sorted(IEMOCAP_RAW.rglob('*'), reverse=True):
        if d.is_dir() and not any(d.iterdir()):
            try: d.rmdir()
            except OSError: pass

    final_sessions = list(IEMOCAP_RAW.glob('Session*'))
    print(f'  ✓ IEMOCAP: {len(final_sessions)} sessions ready')
else:
    print(f'  ✓ IEMOCAP already extracted: {len(list(IEMOCAP_RAW.glob("Session*")))} sessions')

# ── Show what's inside both datasets ──
print('\n' + '='*60)
print('DEAP contents:')
for f in sorted(DEAP_RAW.iterdir())[:10]:
    print(f'  {f.name}  ({f.stat().st_size // 1024} KB)')
dat_count = len(list(DEAP_RAW.glob('*.dat')))
if dat_count > 10:
    print(f'  ... {dat_count} .dat files total')

print('\nIEMOCAP contents:')
for item in sorted(IEMOCAP_RAW.iterdir())[:15]:
    tag = '[DIR]' if item.is_dir() else f'{item.stat().st_size // 1024} KB'
    print(f'  {item.name}  ({tag})')
    if item.is_dir():
        for sub in sorted(item.iterdir())[:5]:
            stag = '[DIR]' if sub.is_dir() else f'{sub.stat().st_size // 1024} KB'
            print(f'    {sub.name}  ({stag})')
        remaining = len(list(item.iterdir())) - 5
        if remaining > 0:
            print(f'    ... +{remaining} more')

Extracting DEAP .dat files ...
  Found 32 .dat files inside zip
  ✓ DEAP: 32 .dat files ready
Extracting IEMOCAP ...
  Zip has 1 entries
    IEMOCAP_full_release_withoutVideos.tar.gz
  .wav files: 0, .txt files: 0
  Top-level items in raw/: ['IEMOCAP_full_release_withoutVideos.tar.gz']
  Session* dirs found (recursive): 0
  ⚠️  No Session* directories found. Checking .wav locations...
  Found 0 .wav files on disk
  ✓ IEMOCAP: 0 sessions ready


## 5. Verify Setup

In [10]:
!python scripts/verify_setup.py

AMERS Environment Verification

[1] Python version
Python 3.12.12

[2] GPU

[3] Libraries
  torch                2.10.0+cpu
  numpy                1.26.4
  scipy                1.16.3
  sklearn              1.6.1
  mne                  1.11.0
  librosa              0.11.0
  gymnasium            1.2.3
  omegaconf            2.3.0
  tqdm                 4.67.3
  tensorboard          2.19.0
  seaborn              0.13.2
  matplotlib           3.10.0

[4] Google Drive
Drive mounted at /content/drive/MyDrive

[5] Datasets
  DEAP: 1 items in /content/drive/MyDrive/AMERS/data/deap/raw
  IEMOCAP: 1 items in /content/drive/MyDrive/AMERS/data/iemocap/raw

✓ Verification complete.


## 6. Pre-process Datasets

In [22]:
# Pre-process DEAP (EEG → Differential Entropy features)
!python scripts/preprocess_deap.py --config config/default.yaml

  SKIP s01.dat (not found)
  SKIP s02.dat (not found)
  SKIP s03.dat (not found)
  SKIP s04.dat (not found)
  SKIP s05.dat (not found)
  SKIP s06.dat (not found)
  SKIP s07.dat (not found)
  SKIP s08.dat (not found)
  SKIP s09.dat (not found)
  SKIP s10.dat (not found)
  SKIP s11.dat (not found)
  SKIP s12.dat (not found)
  SKIP s13.dat (not found)
  SKIP s14.dat (not found)
  SKIP s15.dat (not found)
  SKIP s16.dat (not found)
  SKIP s17.dat (not found)
  SKIP s18.dat (not found)
  SKIP s19.dat (not found)
  SKIP s20.dat (not found)
  SKIP s21.dat (not found)
  SKIP s22.dat (not found)
  SKIP s23.dat (not found)
  SKIP s24.dat (not found)
  SKIP s25.dat (not found)
  SKIP s26.dat (not found)
  SKIP s27.dat (not found)
  SKIP s28.dat (not found)
  SKIP s29.dat (not found)
  SKIP s30.dat (not found)
  SKIP s31.dat (not found)
  SKIP s32.dat (not found)
DEAP pre-processing complete.


In [None]:
# Pre-process IEMOCAP (Speech → MFCC features)
!python scripts/preprocess_iemocap.py --config config/default.yaml

## 7. Train — Step by Step

You can either run the full pipeline at once, or step through each stage.

In [None]:
# Option A: Full pipeline (all stages)
# !python scripts/train_full_pipeline.py --config config/default.yaml

# Option B: Step by step (uncomment one at a time)

# Step 1: Train GAN
!python scripts/train_gan.py --config config/default.yaml

In [None]:
# Step 2: Pre-train speech encoder
!python scripts/train_speech.py --config config/default.yaml

In [None]:
# Step 3: Train fusion classifier (baseline, no RL)
!python scripts/train_fusion.py --config config/default.yaml

In [None]:
# Step 4: Train RL agent (PPO augmentation control)
!python scripts/train_rl.py --config config/default.yaml

## 8. Evaluate

In [None]:
!python scripts/evaluate.py --config config/default.yaml

## 9. View Results

In [None]:
from IPython.display import Image, display, Markdown

outputs = DRIVE_BASE / 'outputs'

# Confusion matrix
cm_path = outputs / 'confusion_matrix.png'
if cm_path.exists():
    display(Image(str(cm_path)))

# Report
report_path = outputs / 'report.md'
if report_path.exists():
    display(Markdown(report_path.read_text()))

## 10. Push Updates to GitHub

In [None]:
# Uncomment and run after making changes
# !cd /content/amers && git add -A && git commit -m 'Update from Colab' && git push