In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pathlib, os, subprocess, json, datetime, shutil

REPO_URL   = "https://github.com/SzymonSmagowski/DeepLearningCourse.git"
BRANCH     = "main"
DATA_IN_DRIVE = "data/speech_commands_v0.01"

ROOT_DRIVE   = pathlib.Path("/content/drive/MyDrive")
DATA_SRC     = ROOT_DRIVE / DATA_IN_DRIVE
DATA_DST     = pathlib.Path("/content/data")                        # where the script expects it
REPO_DIR     = pathlib.Path("/content") / pathlib.Path(REPO_URL).stem
LOGS_DRIVE   = ROOT_DRIVE / "speech_runs"      # all logs live here

print("Repo dir :", REPO_DIR)
print("Data src :", DATA_SRC)
print("Data dst :", DATA_DST)
print("Logs dir :", LOGS_DRIVE)

Repo dir : /content/DeepLearningCourse
Data src : /content/drive/MyDrive/data/speech_commands_v0.01
Data dst : /content/data
Logs dir : /content/drive/MyDrive/speech_runs


In [3]:
if not REPO_DIR.exists():
    !git clone -b "$BRANCH" "$REPO_URL" "$REPO_DIR"
else:
    %cd $REPO_DIR
    !git pull origin "$BRANCH"
    %cd -

/content/DeepLearningCourse
From https://github.com/SzymonSmagowski/DeepLearningCourse
 * branch            main       -> FETCH_HEAD
Already up to date.
/content


In [5]:
# -----------------------------------------------------------
# 📦  Get Speech-Commands into /content/data   (idempotent)
#
#   1. If it’s already in /content            →  nothing to do
#   2. Else if Drive has an extracted folder  →  symlink (or copy)
#   3. Else if Drive only has *.tar.gz        →  copy tar, untar locally
# -----------------------------------------------------------
import tarfile, time, shutil, os
from pathlib import Path

# --- paths --------------------------------------------------
ROOT_DRIVE  = Path("/content/drive/MyDrive/data")           # 🔁 adjust if needed
DATA_NAME   = "speech_commands_v0.01"                  # folder inside tar
DATA_DST    = Path("/content/data")                    # where scripts look
DATA_SRC    = ROOT_DRIVE / DATA_NAME                   # extracted in Drive
DATA_TAR    = ROOT_DRIVE / f"{DATA_NAME}.tar.gz"       # compressed in Drive
LOCAL_TAR   = Path("/content") / DATA_TAR.name         # temp copy

DATA_DST.mkdir(parents=True, exist_ok=True)            # ensures /content/data

# full path once extracted in Colab
LOCAL_DATA = DATA_DST / DATA_NAME

# --- logic --------------------------------------------------
if LOCAL_DATA.exists():
    print(f"✓ dataset already present at {LOCAL_DATA}")

elif DATA_SRC.exists():                                # extracted on Drive
    try:
        LOCAL_DATA.symlink_to(DATA_SRC, target_is_directory=True)
        print(f"🔗  Symlinked {DATA_SRC} → {LOCAL_DATA}")
    except Exception as e:
        print(f"Symlink failed ({e.__class__.__name__}); copying …")
        t0 = time.time()
        shutil.copytree(DATA_SRC, LOCAL_DATA, dirs_exist_ok=True)
        print(f"✓ copied in {time.time()-t0:.1f}s")

elif DATA_TAR.exists():                                # only tar on Drive
    print("📦  Found tarball in Drive — copying locally …")
    if not LOCAL_TAR.exists():
        shutil.copy2(DATA_TAR, LOCAL_TAR)
        sz = LOCAL_TAR.stat().st_size / 1_048_576
        print(f"   → {sz:.1f} MB copied")

    print("🗜️   Extracting …")
    t0 = time.time()
    with tarfile.open(LOCAL_TAR, "r:gz") as tf:
        tf.extractall(path=DATA_DST)
    print(f"✓ extracted in {time.time()-t0:.1f}s → {LOCAL_DATA}")

    LOCAL_TAR.unlink()                # optional: keep workspace tidy

else:
    raise FileNotFoundError(
        "Dataset not found!\n"
        f"Looked for either:\n  • {DATA_SRC}\n  • {DATA_TAR}"
    )

📦  Found tarball in Drive — copying locally …
   → 1420.1 MB copied
🗜️   Extracting …
✓ extracted in 33.9s → /content/data/speech_commands_v0.01


In [6]:
!pip install torch torchaudio librosa soundfile scikit-learn tqdm matplotlib pandas seaborn



In [8]:
# ████████████████████████████████████████████████████████████
#  Orchestrator for Speech-Commands experiments (Python version)
# ████████████████████████████████████████████████████████████
import os, sys, time, tarfile, subprocess, textwrap, shutil
from pathlib import Path

# ───────────────────────────── paths ─────────────────────────────
DRIVE_ROOT      = Path("/content/drive/MyDrive").expanduser()
REPO_DIR        = Path("/content/DeepLearningCourse")        # cloned repo
PROJECT_DIR     = REPO_DIR / "project_2"                     # *.py files
DATA_ARCHIVE    = DRIVE_ROOT / "data/speech_commands_v0.01.tar.gz"
DATA_DIR        = Path("/content/data")  # will be created
RESULTS_ROOT    = DRIVE_ROOT / "speech_results"              # logs + json

# ─────────────────── mount drive (if not mounted yet) ───────────────────
if not DRIVE_ROOT.exists():
    from google.colab import drive
    drive.mount(str(DRIVE_ROOT))

# ─────────────────── unpack dataset once ───────────────────
if not DATA_DIR.exists():
    DATA_DIR.parent.mkdir(parents=True, exist_ok=True)
    print("📦  Extracting Speech-Commands archive … (1–2 min)")
    with tarfile.open(DATA_ARCHIVE) as tf:
        tf.extractall(path=DATA_DIR.parent)
    print("✅  Extraction done:", DATA_DIR)

# ─────────────────── ensure results / log folders ───────────────────
(RESULTS_ROOT / "logs").mkdir(parents=True, exist_ok=True)

# ─────────────────── models / tasks to run ───────────────────
MODELS = [
    "Conformer.py", "DPT.py", "GRU.py",
    "HTFP.py", "LSTM.py", "RNN.py", "transformer.py"
]
TASKS  = [1, 2, 3, 4]

# ─────────────────── helper: run one command ───────────────────
def run_and_tee(cmd, log_file, cwd, extra_env=None):
    """
    Run *cmd* (list/str) inside *cwd*, stream stdout+stderr to screen
    and append to *log_file*.  Return exit-code.
    """
    env = os.environ.copy()
    if extra_env:
        env.update(extra_env)

    if isinstance(cmd, str):
        cmd = cmd.split()

    print("💻", " ".join(cmd))
    with open(log_file, "a", buffering=1) as lf:            # line-buffered
        lf.write(f"\n{'='*80}\nCMD: {' '.join(cmd)}\nSTART: {time.ctime()}\n")
        proc = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
            cwd=str(cwd), env=env, text=True
        )
        for line in proc.stdout:       # real-time relay
            print(line, end="")
            lf.write(line)
        proc.wait()
        lf.write(f"\nEND  : {time.ctime()}  (ret={proc.returncode})\n")
    return proc.returncode

# ─────────────────── main loop ───────────────────
failed = []      # collect (model, task) that fail

for task in TASKS:
    for model in MODELS:
        model_path = PROJECT_DIR / model
        mname      = model_path.stem           # strip .py
        print(f"\n▶️  Running {model} on task{task}")

        # per-run dirs
        run_results = RESULTS_ROOT / "results" / mname / f"task{task}"
        run_results.mkdir(parents=True, exist_ok=True)
        log_file    = RESULTS_ROOT / "logs"    / f"{mname}_task{task}.log"

        # command & env
        cmd = [
            sys.executable, str(model_path),
            "--task", str(task),
            "--data_path", str(DATA_DIR),
            "--results_dir", str(run_results),
            "--batch_size", "256",
            "--seed",        "0",
            "--search"
        ]
        # tell helper which of the four dataloader functions to call
        rc = run_and_tee(cmd, log_file, cwd=PROJECT_DIR, extra_env={"TASK": str(task)})
        if rc == 0:
            print(f"✅  {model} / task{task} finished OK")
        else:
            print(f"❌  {model} / task{task} FAILED (exit {rc}) — log saved to {log_file}")
            failed.append((model, task))

# ─────────────────── summary ───────────────────
print("\n" + "═"*35 + " SUMMARY " + "═"*35)
if failed:
    print("The following runs failed:")
    for m, t in failed:
        print(f"  • {m}:task{t}")
else:
    print("🎉  All runs completed successfully!")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
loading train:   9%|▉         | 1726/18538 [00:09<02:13, 125.77it/s]
loading train:   9%|▉         | 1745/18538 [00:09<02:00, 138.99it/s]
loading train:  10%|▉         | 1762/18538 [00:09<02:01, 138.61it/s]
loading train:  10%|▉         | 1780/18538 [00:09<01:56, 143.53it/s]
loading train:  10%|▉         | 1796/18538 [00:09<02:06, 132.62it/s]
loading train:  10%|▉         | 1811/18538 [00:09<02:06, 132.46it/s]
loading train:  10%|▉         | 1825/18538 [00:10<02:47, 99.99it/s] 
loading train:  10%|▉         | 1837/18538 [00:10<03:26, 80.78it/s]
loading train:  10%|▉         | 1847/18538 [00:10<04:19, 64.28it/s]
loading train:  10%|█         | 1855/18538 [00:10<04:54, 56.68it/s]
loading train:  10%|█         | 1873/18538 [00:11<03:34, 77.76it/s]
loading train:  10%|█         | 1892/18538 [00:11<02:48, 98.54it/s]
loading train:  10%|█         | 1911/18538 [00:11<02:20, 118.24it/s]
loading train:  10%|█         | 1932/18538 