In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os, sys

REPO_DIR = '/content/cs-148'
PROJECT_DIR = os.path.join(REPO_DIR, 'project2')
SRC_DIR = os.path.join(PROJECT_DIR, 'src')

if not os.path.exists(REPO_DIR):
    !git clone https://github.com/ArjunS07/cs-148.git {REPO_DIR}
else:
    !git -C {REPO_DIR} pull

# Both PROJECT_DIR and SRC_DIR must be on sys.path:
# - PROJECT_DIR so `from src.train import train` resolves the src/ package
# - SRC_DIR so src/train.py's own `from model import build_model` resolves
for p in [PROJECT_DIR, SRC_DIR]:
    if p not in sys.path:
        sys.path.insert(0, p)

os.chdir(PROJECT_DIR)
print(f'Working directory: {os.getcwd()}')
print(f'sys.path includes src/: {SRC_DIR in sys.path}')

In [None]:
DATA_DIR = os.path.join(PROJECT_DIR, 'data', 'dataset')

if os.path.exists(DATA_DIR):
    print(f'Dataset already exists at {DATA_DIR}')
else:
    from google.colab import files
    uploaded = files.upload() 
    zip_name = list(uploaded.keys())[0]
    os.makedirs('data', exist_ok=True)
    !unzip -q {zip_name} -d data/
    print(f'Unzipped to data/')

!ls {DATA_DIR} | head -20

In [None]:
import torch
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
import argparse
from src.train import train

args = argparse.Namespace(
    data_dir='data/dataset',
    save_dir='checkpoints/colab_run',
    model='resnet18',
    epochs=200,
    batch_size=64,
    lr=1e-3,
    weight_decay=1e-4,
    warmup_epochs=5,
    label_smoothing=0.05,
    drop_path_rate=0.0,
    drop_rate=0.2,
    mixup_alpha=0.2,
    mix_prob=0.3,
    img_size=128,
    synthetic_n=9000,
    val_fraction=0.15,
    patience=50,
    num_workers=2,
    seed=42,
    no_augment=False,
    base_width=64,
)

import numpy as np
torch.manual_seed(args.seed)
np.random.seed(args.seed)

model = train(args)

## 5. Export pipeline

In [None]:
from src.pipeline import load_pipeline

checkpoint_path = 'checkpoints/colab_run/best_model.pt'
pipeline = load_pipeline(checkpoint_path, model_name='resnet18')

output_path = 'pipeline-cnn.pt'
pipeline.save_pipeline_local(output_path)
print(f'Saved {output_path}')

# Copy to Google Drive
drive_dest = '/content/drive/MyDrive/cs148/pipeline-cnn.pt'
os.makedirs(os.path.dirname(drive_dest), exist_ok=True)
!cp {output_path} {drive_dest}
print(f'Copied to {drive_dest}')