# Kaggle Training Entry Point

This notebook prepares the workspace, installs dependencies, rewrites the config with Kaggle paths, and launches training. Attach the datasets that host:

- the repository snapshot (read-only under `/kaggle/input/...`)
- pretrained encoders (`artifacts/audio-encoder`, `artifacts/roberta-text-encoder`)
- IEMOCAP raw data (`IEMOCAP_full_release`) and `iemocap_manifest.jsonl`

Update the constants below if your dataset names differ.

In [None]:
!rm -rf /kaggle/working/ser-conformer-gat-xai
!git clone https://github.com/SpeedyLabX/ser-conformer-gat-xai.git /kaggle/working/ser-conformer-gat-xai

In [None]:
from pathlib import Path
import shutil

WORK_DIR = Path("/kaggle/working/ser-conformer-gat-xai")
ARTIFACTS_DATASET = Path("/kaggle/input/text-audio-encoders/pytorch/default/1/artifacts")
IEMOCAP_ROOT = Path("/kaggle/input/iemocapfullrelease/IEMOCAP_full_release")
MANIFEST_PATH = Path("/kaggle/input/iemocap-manifest-jsonl/iemocap_manifest.jsonl")

assert WORK_DIR.exists(), "Repository clone missing"
assert ARTIFACTS_DATASET.exists(), "Encoder dataset path incorrect"
assert IEMOCAP_ROOT.exists(), "IEMOCAP dataset path incorrect"
assert MANIFEST_PATH.exists(), "Manifest dataset path incorrect"

shutil.copytree(ARTIFACTS_DATASET, WORK_DIR / "artifacts", dirs_exist_ok=True)
print("Workspace ready at", WORK_DIR)

In [None]:
!pip install --quiet torch==2.2.0 torchaudio==2.2.0 --index-url https://download.pytorch.org/whl/cu118
!pip install --quiet transformers soundfile scikit-learn pyyaml tqdm matplotlib networkx

In [None]:
import yaml
from pathlib import Path

WORK_DIR = Path("/kaggle/working/ser-conformer-gat-xai")
cfg_path = WORK_DIR / "configs" / "iemocap.yaml"
cfg = yaml.safe_load(cfg_path.read_text())

cfg.setdefault("data", {})
cfg["data"]["root"] = str(IEMOCAP_ROOT)
cfg["data"]["manifest"] = str(MANIFEST_PATH)
cfg["data"]["num_workers"] = 0  # safer on Kaggle
cfg.setdefault("model", {})
cfg["model"].setdefault("audio", {})
cfg["model"]["audio"]["checkpoint"] = str(WORK_DIR / "artifacts" / "audio-encoder" / "conformer_encoder.pkl")
cfg["model"].setdefault("text", {})
cfg["model"]["text"]["checkpoint"] = str(WORK_DIR / "artifacts" / "roberta-text-encoder")
cfg.setdefault("trainer", {})
cfg["trainer"]["batch_size"] = cfg["trainer"].get("batch_size", 8)
cfg["artifacts_dir"] = str(WORK_DIR / "artifacts")

resolved_cfg = WORK_DIR / "configs" / "iemocap_kaggle.yaml"
resolved_cfg.write_text(yaml.safe_dump(cfg, sort_keys=False))
print("Resolved config written to", resolved_cfg)

In [None]:
import os
import subprocess
import sys

WORK_DIR = "/kaggle/working/ser-conformer-gat-xai"
os.chdir(WORK_DIR)
if "src" not in sys.path:
    sys.path.append("src")

result = subprocess.run([
    "python",
    "-m",
    "src.cli.train",
    "--config",
    "configs/iemocap_kaggle.yaml",
    "--dry-run",
], check=True)
print("Dry run return code:", result.returncode)

In [None]:
import os
import subprocess
import sys

WORK_DIR = "/kaggle/working/ser-conformer-gat-xai"
os.chdir(WORK_DIR)
if "src" not in sys.path:
    sys.path.append("src")

subprocess.run([
    "python",
    "-m",
    "src.cli.train",
    "--config",
    "configs/iemocap_kaggle.yaml",
], check=True)