## 1. Setup

In [None]:
!pip install -q gdown scikit-learn

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## 2. Clone repo and add to path

In [None]:
import os

REPO_DIR = '/content/cs-148'
PROJECT_DIR = os.path.join(REPO_DIR, 'project2')

if not os.path.exists(REPO_DIR):
    !git clone "https://github.com/ArjunS07/cs-148.git" {REPO_DIR}
else:
    !cd {REPO_DIR} && git pull

import sys
if PROJECT_DIR not in sys.path:
    sys.path.insert(0, PROJECT_DIR)

os.chdir(PROJECT_DIR)
print(f'Working directory: {os.getcwd()}')

## 3. Upload / download dataset

Option A: Upload `dataset.zip` manually and unzip.  
Option B: Download from Google Drive with gdown (fill in your file ID).

In [None]:
DATA_DIR = os.path.join(PROJECT_DIR, 'data', 'dataset')

if os.path.exists(DATA_DIR):
    print(f'Dataset already exists at {DATA_DIR}')
else:
    from google.colab import files
    uploaded = files.upload() 
    zip_name = list(uploaded.keys())[0]
    os.makedirs('data', exist_ok=True)
    !unzip -q {zip_name} -d data/
    print(f'Unzipped to data/')

!ls {DATA_DIR} | head -20

## 4. Train

In [None]:
import torch
print(f'CUDA available: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU: {torch.cuda.get_device_name(0)}')

In [None]:
import argparse
from train import train

args = argparse.Namespace(
    data_dir='data/dataset',
    save_dir='checkpoints/colab_run',
    model='resnet18',
    epochs=200,
    batch_size=64,
    lr=1e-3,
    weight_decay=1e-4,
    warmup_epochs=5,
    label_smoothing=0.05,
    drop_path_rate=0.0,
    drop_rate=0.2,
    mixup_alpha=0.2,
    mix_prob=0.3,
    img_size=128,
    synthetic_n=6000,
    val_fraction=0.15,
    patience=50,
    num_workers=2,
    seed=42,
    no_augment=False,
    base_width=64,
)

import numpy as np
torch.manual_seed(args.seed)
np.random.seed(args.seed)

model = train(args)