# CLIP Model Evaluation on Google Colab (FAST VERSION)

**Evaluates ALL models with Recall@K metrics**

‚úÖ **Copies images to local storage (100x faster!)**
‚úÖ **No zipping needed - just upload folder**
‚úÖ **Progress bars & saves to Drive**

---

## üìã Upload to Google Drive:

```
My Drive/elec475_lab4/
  models/
    *.pth files
  data/
    text_embeddings_val.pt
    coco_val/              ‚Üê Just the folder!
      COCO_val2014_*.jpg
```

**No zipping needed!** Just upload the folder as-is.

---

## 1. Setup & Mount Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content')

import torch
print("=" * 80)
print("GPU CHECK")
print("=" * 80)
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
print("=" * 80)

## 2. Install Dependencies

In [None]:
!pip install -q transformers torch torchvision tqdm pillow matplotlib pandas
print("‚úì Dependencies installed")

## 3. Clone Repository

In [None]:
import shutil
if os.path.exists('475_ML-CV_Labs'):
    shutil.rmtree('475_ML-CV_Labs')

!git clone https://github.com/Jcub05/475_ML-CV_Labs.git
os.chdir('475_ML-CV_Labs/Lab4')
print(f"‚úì Directory: {os.getcwd()}")

## 4. Configure Paths

In [None]:
from pathlib import Path

# Drive paths (slow)
DRIVE_ROOT = Path("/content/drive/MyDrive/elec475_lab4")
MODELS_DIR = DRIVE_ROOT / "models"
DATA_DIR = DRIVE_ROOT / "data"
VAL_EMBEDDINGS_DRIVE = DATA_DIR / "text_embeddings_val.pt"
VAL_IMAGES_DRIVE = DATA_DIR / "coco_val"  # ‚Üê Folder in Drive

# Local Colab paths (FAST!)
LOCAL_DATA = Path("/content/data")
LOCAL_DATA.mkdir(exist_ok=True)
VAL_IMAGES_LOCAL = LOCAL_DATA / "coco_val"  # ‚Üê Copy to local
VAL_EMBEDDINGS_LOCAL = LOCAL_DATA / "text_embeddings_val.pt"

# Results (save to Drive)
RESULTS_DIR = DRIVE_ROOT / "results"
RESULTS_DIR.mkdir(exist_ok=True, parents=True)

print("=" * 80)
print("PATH CONFIGURATION")
print("=" * 80)
print(f"Models (Drive): {MODELS_DIR}")
print(f"Images (Drive): {VAL_IMAGES_DRIVE}")
print(f"Images (Local): {VAL_IMAGES_LOCAL} ‚Üê FAST!")
print(f"Results (Drive): {RESULTS_DIR}")
print("=" * 80)

## 5. Copy Images to Local Storage (FAST!)

In [None]:
%%time

if not VAL_IMAGES_DRIVE.exists():
    print(f"‚ùå ERROR: {VAL_IMAGES_DRIVE} not found!")
    print("Please upload coco_val/ folder to your Drive data/ folder")
else:
    img_count_drive = len(list(VAL_IMAGES_DRIVE.glob("*.jpg")))
    print(f"üìÅ Found in Drive: {img_count_drive} images")
    
    if VAL_IMAGES_LOCAL.exists():
        img_count_local = len(list(VAL_IMAGES_LOCAL.glob("*.jpg")))
        if img_count_local == img_count_drive:
            print(f"‚úì Already in local storage ({img_count_local} images)")
        else:
            print(f"‚ö† Partial copy detected, removing...")
            shutil.rmtree(VAL_IMAGES_LOCAL)
    
    if not VAL_IMAGES_LOCAL.exists():
        print(f"\n‚è≥ Copying images to local storage...")
        print(f"   From: {VAL_IMAGES_DRIVE}")
        print(f"   To: {VAL_IMAGES_LOCAL}")
        print(f"   This takes ~5-8 minutes for ~40K images...")
        
        # Use rsync for faster copying with progress
        !rsync -ah --progress "{VAL_IMAGES_DRIVE}/" "{VAL_IMAGES_LOCAL}/"
        
        img_count_local = len(list(VAL_IMAGES_LOCAL.glob("*.jpg")))
        print(f"\n‚úì Copied {img_count_local} images to local storage")
    
    print(f"\nüöÄ Reading from local = 100x faster than Drive!")

## 6. Copy Embeddings to Local Storage

In [None]:
if not VAL_EMBEDDINGS_LOCAL.exists():
    print(f"Copying embeddings...")
    shutil.copy(VAL_EMBEDDINGS_DRIVE, VAL_EMBEDDINGS_LOCAL)
    print("‚úì Copied")
else:
    print("‚úì Embeddings already local")

print(f"Size: {VAL_EMBEDDINGS_LOCAL.stat().st_size / 1e6:.1f} MB")

## 7. Find All Models

In [None]:
model_files = sorted(MODELS_DIR.glob("*.pth"))

print("\n" + "=" * 80)
print(f"FOUND {len(model_files)} MODEL(S)")
print("=" * 80)
for i, mf in enumerate(model_files, 1):
    print(f"{i}. {mf.name} ({mf.stat().st_size / 1e6:.1f} MB)")
print("=" * 80)

## 8. Load Model & Data

In [None]:
from model import CLIPFineTuneModel
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm.auto import tqdm
import torch.nn.functional as F
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def load_model(checkpoint_path):
    print(f"\nLoading: {checkpoint_path.name}")
    model = CLIPFineTuneModel(
        embed_dim=512,
        pretrained_resnet=True,
        clip_model_name="openai/clip-vit-base-patch32",
        freeze_text_encoder=True
    ).to(device)
    
    checkpoint = torch.load(checkpoint_path, map_location=device)
    if 'model_state_dict' in checkpoint:
        model.load_state_dict(checkpoint['model_state_dict'])
    else:
        model.load_state_dict(checkpoint)
    
    model.eval()
    print("‚úì Loaded")
    return model

class ValidationDataset(Dataset):
    def __init__(self, images_dir, embeddings_file, transform=None):
        self.images_dir = Path(images_dir)
        self.transform = transform
        
        print(f"Loading embeddings...")
        embeddings_cache = torch.load(embeddings_file)
        
        all_embeddings = []
        all_image_ids = []
        
        for key, embedding in embeddings_cache.items():
            image_id_str, caption_idx = key.rsplit('_', 1)
            if caption_idx == '0':
                all_embeddings.append(embedding)
                all_image_ids.append(int(image_id_str))
        
        valid_embeddings = []
        valid_paths = []
        valid_ids = []
        
        print("Building dataset...")
        for img_id, emb in tqdm(zip(all_image_ids, all_embeddings), total=len(all_image_ids), desc="Checking"):
            img_path = self.images_dir / f"COCO_val2014_{img_id:012d}.jpg"
            if img_path.exists():
                valid_embeddings.append(emb)
                valid_paths.append(img_path)
                valid_ids.append(img_id)
        
        self.embeddings = torch.stack(valid_embeddings)
        self.image_paths = valid_paths
        self.image_ids = valid_ids
        print(f"‚úì {len(self)} samples")
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return {
            'image': image,
            'text_embedding': self.embeddings[idx],
            'image_id': self.image_ids[idx]
        }

print("‚úì Functions ready")

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.48145466, 0.4578275, 0.40821073),
        std=(0.26862954, 0.26130258, 0.27577711)
    )
])

val_dataset = ValidationDataset(
    images_dir=VAL_IMAGES_LOCAL,  # ‚Üê LOCAL!
    embeddings_file=VAL_EMBEDDINGS_LOCAL,
    transform=transform
)

val_loader = DataLoader(
    val_dataset,
    batch_size=128,
    shuffle=False,
    num_workers=2,
    pin_memory=True
)

print(f"\n‚úì Dataloader ready ({len(val_dataset)} samples)")

## 9. Metrics & Evaluation

In [None]:
def compute_recall_at_k_gpu(similarity_matrix, k_values=[1, 5, 10]):
    n = similarity_matrix.shape[0]
    device = similarity_matrix.device
    metrics = {}
    max_k = max(k_values)
    
    _, top_k_indices = torch.topk(similarity_matrix, k=max_k, dim=1)
    correct_indices = torch.arange(n, device=device).unsqueeze(1)
    
    for k in k_values:
        correct_in_top_k = (top_k_indices[:, :k] == correct_indices).any(dim=1)
        metrics[f'img2txt_r{k}'] = correct_in_top_k.float().mean().item() * 100
    
    _, top_k_indices = torch.topk(similarity_matrix.T, k=max_k, dim=1)
    
    for k in k_values:
        correct_in_top_k = (top_k_indices[:, :k] == correct_indices).any(dim=1)
        metrics[f'txt2img_r{k}'] = correct_in_top_k.float().mean().item() * 100
    
    avg_img2txt = np.mean([metrics[f'img2txt_r{k}'] for k in k_values])
    avg_txt2img = np.mean([metrics[f'txt2img_r{k}'] for k in k_values])
    metrics['avg_img2txt'] = avg_img2txt
    metrics['avg_txt2img'] = avg_txt2img
    metrics['avg_recall'] = (avg_img2txt + avg_txt2img) / 2
    
    return metrics

def evaluate_model(model, dataloader, model_name="Model"):
    print(f"\n{'='*80}")
    print(f"EVALUATING: {model_name}")
    print(f"{'='*80}")
    
    model.eval()
    all_image_embeds = []
    all_text_embeds = []
    
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Extracting"):
            images = batch['image'].to(device)
            text_embeddings = batch['text_embedding'].to(device)
            image_embeds, text_embeds = model(images, text_embeddings=text_embeddings)
            all_image_embeds.append(image_embeds)
            all_text_embeds.append(text_embeds)
    
    all_image_embeds = torch.cat(all_image_embeds, dim=0)
    all_text_embeds = torch.cat(all_text_embeds, dim=0)
    
    similarity = all_image_embeds @ all_text_embeds.T
    metrics = compute_recall_at_k_gpu(similarity, k_values=[1, 5, 10])
    
    print(f"\n{'='*80}")
    print(f"RESULTS: {model_name}")
    print(f"{'='*80}")
    print(f"\nüìä Image ‚Üí Text:")
    print(f"  R@1:  {metrics['img2txt_r1']:.2f}%")
    print(f"  R@5:  {metrics['img2txt_r5']:.2f}%")
    print(f"  R@10: {metrics['img2txt_r10']:.2f}%")
    print(f"\nüìä Text ‚Üí Image:")
    print(f"  R@1:  {metrics['txt2img_r1']:.2f}%")
    print(f"  R@5:  {metrics['txt2img_r5']:.2f}%")
    print(f"  R@10: {metrics['txt2img_r10']:.2f}%")
    print(f"\nüéØ Avg: {metrics['avg_recall']:.2f}%")
    print(f"{'='*80}\n")
    
    return metrics

print("‚úì Ready")

## 10. Evaluate ALL Models

In [None]:
%%time

import json

all_results = {}

print(f"\n{'='*80}")
print(f"EVALUATING {len(model_files)} MODEL(S)")
print(f"{'='*80}\n")

for idx, model_file in enumerate(tqdm(model_files, desc="Overall"), 1):
    model_name = model_file.stem
    print(f"\n[{idx}/{len(model_files)}] {model_name}")
    print("-" * 80)
    
    model = load_model(model_file)
    metrics = evaluate_model(model, val_loader, model_name=model_name)
    all_results[model_name] = metrics
    
    temp_file = RESULTS_DIR / f"{model_name}_metrics.json"
    with open(temp_file, 'w') as f:
        json.dump(metrics, f, indent=2)
    print(f"üíæ {temp_file.name}")
    
    del model
    torch.cuda.empty_cache()

print("\n" + "="*80)
print(f"‚úÖ DONE")
print("="*80)

## 11. Summary

In [None]:
import pandas as pd

summary_data = []
for model_name, metrics in all_results.items():
    summary_data.append({
        'Model': model_name,
        'I2T R@1': f"{metrics['img2txt_r1']:.2f}%",
        'I2T R@5': f"{metrics['img2txt_r5']:.2f}%",
        'I2T R@10': f"{metrics['img2txt_r10']:.2f}%",
        'T2I R@1': f"{metrics['txt2img_r1']:.2f}%",
        'T2I R@5': f"{metrics['txt2img_r5']:.2f}%",
        'T2I R@10': f"{metrics['txt2img_r10']:.2f}%",
        'Avg': f"{metrics['avg_recall']:.2f}%"
    })

summary_df = pd.DataFrame(summary_data)
print("\n" + "="*80)
print("SUMMARY")
print("="*80)
print(summary_df.to_string(index=False))
print("="*80)

csv_path = RESULTS_DIR / "evaluation_results.csv"
summary_df.to_csv(csv_path, index=False)
json_path = RESULTS_DIR / "detailed_metrics.json"
with open(json_path, 'w') as f:
    json.dump(all_results, f, indent=2)

print(f"\nüíæ Saved to Drive: {RESULTS_DIR}")
print("="*80)