In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [19]:
import pathlib, os, subprocess, json, datetime, shutil

REPO_URL = "https://github.com/SzymonSmagowski/DeepLearningCourse.git"
BRANCH = "main"
DATA_IN_DRIVE = "data/processed_cats.zip"

ROOT_DRIVE = pathlib.Path("/content/drive/MyDrive")
DATA_SRC = ROOT_DRIVE / DATA_IN_DRIVE

DATA_DST = pathlib.Path("/content/data")
REPO_DIR = pathlib.Path("/content") / pathlib.Path(REPO_URL).stem
OUTPUTS_DIR = ROOT_DRIVE / "project3_outputs"
PROJECT_DIR = REPO_DIR / "project_3"

print("Repo dir:", REPO_DIR)
print("Data src:", DATA_SRC)
print("Data dst:", DATA_DST)
print("Outputs dir:", OUTPUTS_DIR)

Repo dir : /content/DeepLearningCourse
Data src : /content/drive/MyDrive/data/processed_cats.zip
Data dst : /content/data
Outputs dir : /content/drive/MyDrive/project3_outputs


In [11]:
if not REPO_DIR.exists():
    !git clone -b "$BRANCH" "$REPO_URL" "$REPO_DIR"
else:
    %cd $REPO_DIR
    !git pull origin "$BRANCH"
    %cd -

/content/DeepLearningCourse
From https://github.com/SzymonSmagowski/DeepLearningCourse
 * branch            main       -> FETCH_HEAD
Already up to date.
/content


In [13]:
import tarfile, time, shutil, os
from pathlib import Path

DATA_NAME   = "processed_cats.zip"
DATA_DST.mkdir(parents=True, exist_ok=True)            # ensures /content/data

# full path once extracted in Colab
LOCAL_DATA = DATA_DST / DATA_NAME
DATA_ZIP = DATA_SRC / DATA_NAME

# --- logic --------------------------------------------------
if LOCAL_DATA.exists():
    print(f"✓ dataset already present at {LOCAL_DATA}")
elif DATA_SRC.exists():                                # extracted on Drive
    try:
        LOCAL_DATA.symlink_to(DATA_SRC, target_is_directory=True)
        print(f"🔗  Symlinked {DATA_SRC} → {LOCAL_DATA}")
    except Exception as e:
        print(f"Symlink failed ({e.__class__.__name__}); copying …")
        t0 = time.time()
        shutil.copytree(DATA_SRC, LOCAL_DATA, dirs_exist_ok=True)
        print(f"✓ copied in {time.time()-t0:.1f}s")
else:
    raise FileNotFoundError(
        "Dataset not found!\n"
        f"Looked for either:\n  • {DATA_SRC}\n  • {DATA_ZIP}"
    )

✓ dataset already present at /content/data/processed_cats.zip


In [18]:
!unzip -q $LOCAL_DATA -d $DATA_DST

In [16]:
!pip install -q -r DeepLearningCourse/project_3/requirements.txt

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.5/207.5 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.1/21.1 MB[0m [31m41.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m962.5/962.5 kB[0m [31m58.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [21]:
import os, sys, time, tarfile, subprocess, textwrap, shutil
from pathlib import Path

CONFIGS = [
    ('DIFFUSION', 'diffusion_fast_test.yaml'),
    ('GAN', 'gan_fast_test.yaml')
#     ('DIFFUSION', 'diffusion_128.yaml'),
#     ('DIFFUSION', 'diffusion_256.yaml'),
#     ('GAN', 'gan_128.yaml'),
#     ('GAN', 'gan_256.yaml'),
]

def run_and_tee(cmd, cwd, extra_env=None):
    """
    Run *cmd* (list/str) inside *cwd*. Return exit-code.
    """
    env = os.environ.copy()
    if extra_env:
        env.update(extra_env)

    if isinstance(cmd, str):
        cmd = cmd.split()

    print("💻", " ".join(cmd))
    proc = subprocess.Popen(
        cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        cwd=str(cwd), env=env, text=True
    )
    for line in proc.stdout:
        print(line, end="")
    proc.wait()
    return proc.returncode

# ─────────────────── main loop ───────────────────
failed = []

for model, config_name in CONFIGS:
    model_path = PROJECT_DIR / model
    mname      = model_path.stem
    print(f"\n▶️  Running {model} on config {config_name}")
    config_path = PROJECT_DIR / 'configs' / config_name

    executable = PROJECT_DIR / \
     ('train_diffusion.py' if model == 'DIFFUSION' else 'train_gan.py')
    run_results = OUTPUTS_DIR / mname
    cmd = [
        sys.executable, str(executable)
    ]
    rc = run_and_tee(cmd, cwd=PROJECT_DIR)
    if rc == 0:
        print(f"✅  {model} / {config_name} finished OK")
    else:
        print(f"❌  {model} / {config_name} FAILED (exit {rc})")
        failed.append((model, config_name))

# ─────────────────── summary ───────────────────
print("\n" + "═"*35 + " SUMMARY " + "═"*35)
if failed:
    print("The following runs failed:")
    for m, c in failed:
        print(f"  • {m}: {c}")
else:
    print("🎉  All runs completed successfully!")


▶️  Running DIFFUSION on config diffusion_fast_test.yaml
💻 /usr/bin/python3 /content/DeepLearningCourse/project_3/train_diffusion.py
2025-06-07 20:18:35.572685: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749327515.597555    6512 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749327515.604242    6512 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-06-07 20:18:35.626953: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compile