# ELEC 475 Lab 4: CLIP Training on Kaggle

**Simple, clean training notebook - no inline code modifications!**

Trains baseline CLIP model for 10 epochs (~3-4 hours)

---

## ‚ö†Ô∏è Before Running:

1. **Add datasets**: `jeffaudi/coco-2014-dataset-for-yolov3` + `jcube05/elec-475-lab4`
2. **Enable GPU**: T4 or P100
3. **Enable Internet**: ON
4. **Click "Run All"** and close your laptop! üí§

---

## 1. Environment Check

In [None]:
import os
import torch

print("=" * 80)
print("ENVIRONMENT CHECK")
print("=" * 80)
print(f"Kaggle: {'KAGGLE_KERNEL_RUN_TYPE' in os.environ}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
print("=" * 80)

## 2. Install Dependencies

In [None]:
%%time
!pip install -q transformers torch torchvision tqdm pillow matplotlib
print("‚úì Dependencies installed")

## 3. Clone Repository & Setup

In [None]:
%%time
# Clone repo
!git clone https://github.com/Jcub05/475_ML-CV_Labs.git
os.chdir('475_ML-CV_Labs/Lab4')
print(f"‚úì Repository cloned\nDirectory: {os.getcwd()}")

In [None]:
# Use Kaggle-compatible dataset loader
import shutil

# Backup original and use Kaggle version
shutil.copy('dataset.py', 'dataset_original.py')
shutil.copy('dataset_kaggle.py', 'dataset.py')

print("‚úì Using Kaggle-compatible dataset loader (dataset_kaggle.py)")

## 4. Configure for Kaggle

In [None]:
# Create Kaggle config
from pathlib import Path

if os.path.exists('config.py'):
    os.remove('config.py')

config_code = '''
import os
from dataclasses import dataclass
from pathlib import Path
import torch

@dataclass
class Config:
    is_kaggle: bool = True
    data_root: str = "/kaggle/input/coco-2014-dataset-for-yolov3/coco2014"
    text_embeddings_path: str = "/kaggle/input/elec-475-lab4"
    train_images_dir: str = "images/train2014"
    val_images_dir: str = "images/val2014"
    train_captions_file: str = "annotations/instances_train2014.json"
    val_captions_file: str = "annotations/instances_val2014.json"
    checkpoint_dir: str = "checkpoints"
    results_dir: str = "results"
    embed_dim: int = 512
    image_size: int = 224
    pretrained_resnet: bool = True
    clip_mean: tuple = (0.48145466, 0.4578275, 0.40821073)
    clip_std: tuple = (0.26862954, 0.26130258, 0.27577711)
    clip_model_name: str = "openai/clip-vit-base-patch32"
    batch_size: int = 64
    num_epochs: int = 10
    learning_rate: float = 1e-4
    weight_decay: float = 1e-4
    temperature: float = 0.07
    use_scheduler: bool = True
    scheduler_type: str = "cosine"
    optimizer_type: str = "adamw"
    beta1: float = 0.9
    beta2: float = 0.999
    eps: float = 1e-8
    max_grad_norm: float = 1.0
    device: str = "cuda" if torch.cuda.is_available() else "cpu"
    num_workers: int = 2
    pin_memory: bool = True
    use_amp: bool = True
    use_subset: bool = False
    subset_size: int = 10000
    use_cached_embeddings: bool = True
    eval_every_n_epochs: int = 1
    save_best_only: bool = False
    recall_k_values: list = None
    num_visualization_samples: int = 10
    save_visualizations: bool = True
    log_interval: int = 100
    verbose: bool = True
    
    def __post_init__(self):
        if self.recall_k_values is None:
            self.recall_k_values = [1, 5, 10]
        self.data_root = Path(self.data_root)
        self.train_images_path = self.data_root / self.train_images_dir
        self.val_images_path = self.data_root / self.val_images_dir
        self.train_captions_path = self.data_root / self.train_captions_file
        self.val_captions_path = self.data_root / self.val_captions_file
        self.cache_path = Path(self.text_embeddings_path)
        self.checkpoint_path = Path("/kaggle/working") / self.checkpoint_dir
        self.results_path = Path("/kaggle/working") / self.results_dir
    
    def create_directories(self):
        os.makedirs(self.checkpoint_path, exist_ok=True)
        os.makedirs(self.results_path, exist_ok=True)
    
    def validate_paths(self):
        required = [
            self.train_images_path,
            self.val_images_path,
            self.cache_path / "text_embeddings_train.pt",
            self.cache_path / "text_embeddings_val.pt"
        ]
        missing = [str(p) for p in required if not p.exists()]
        if missing:
            raise FileNotFoundError(f"Missing: {missing}")
        return True
    
    def __repr__(self):
        return f"Config(Kaggle | {self.device} | Batch:{self.batch_size} | Epochs:{self.num_epochs})"

def get_config(**kwargs):
    config = Config(**kwargs)
    config.create_directories()
    return config
'''

with open('config.py', 'w') as f:
    f.write(config_code)

print("‚úì Kaggle config created")

In [None]:
# Validate setup
from config import get_config

config = get_config()
print("\n" + "=" * 80)
print("CONFIGURATION")
print("=" * 80)
print(config)
print(f"\nCheckpoints: {config.checkpoint_path}")
print(f"Results: {config.results_path}")
print(f"Text embeddings: {config.cache_path}")

print(f"\nValidating paths...")
config.validate_paths()
print("‚úì All paths valid!")
print("=" * 80)

## 5. Train Model

Training 10 epochs with batch size 64.

Checkpoints saved to `/kaggle/working/checkpoints/` every epoch.

In [None]:
%%time
print("\n" + "=" * 80)
print("STARTING TRAINING")
print("=" * 80)

!python train.py --num_epochs 10 --batch_size 64

print("\n" + "=" * 80)
print("TRAINING COMPLETE!")
print("=" * 80)

## 6. Results Summary

In [None]:
print("\n" + "=" * 80)
print("OUTPUT FILES")
print("=" * 80)
!ls -lhR /kaggle/working/

print("\n" + "=" * 80)
print("DOWNLOAD INSTRUCTIONS")
print("=" * 80)
print("""
1. Click 'Output' tab at top
2. Download all files
3. Extract on your computer

Key files:
  - checkpoints/best_model.pth
  - checkpoints/training.log
  - results/training_curves.png
""")
print("=" * 80)

---

## ‚úÖ Done!

Download outputs and use for your lab report.

---