<a href="https://colab.research.google.com/github/ElinaYXLin/RaphiApp/blob/main/Raphi_App.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# install dependencies
!pip install torch
!pip install scipy
!pip install lightning
!pip install -q git+https://github.com/openai/CLIP.git
!pip install wandb -qU

Collecting lightning
  Downloading lightning-2.5.6-py3-none-any.whl.metadata (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)
  Downloading lightning_utilities-0.15.2-py3-none-any.whl.metadata (5.7 kB)
Collecting torchmetrics<3.0,>0.7.0 (from lightning)
  Downloading torchmetrics-1.8.2-py3-none-any.whl.metadata (22 kB)
Collecting pytorch-lightning (from lightning)
  Downloading pytorch_lightning-2.5.6-py3-none-any.whl.metadata (20 kB)
Downloading lightning-2.5.6-py3-none-any.whl (827 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m827.9/827.9 kB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.15.2-py3-none-any.whl (29 kB)
Downloading torchmetrics-1.8.2-py3-none-any.whl (983 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m983.2/983.2 kB[0m [31m53.0 MB/s[0m eta [36m0:00:00[0

In [3]:
# mount drive and login to wandb
from google.colab import drive
drive.mount('/content/drive')
!rm "/content/caltech101/101_ObjectCategories/cougar_body/image_0031.jpg" # this image is corrupted!

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
rm: cannot remove '/content/caltech101/101_ObjectCategories/cougar_body/image_0031.jpg': No such file or directory


In [22]:
# setup
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.datasets import ImageFolder
import torch.nn as nn
import clip
import torch
import torch, clip, os
from torchvision import transforms
import lightning as L
from lightning.pytorch.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.metrics import accuracy_score
import wandb
from lightning.pytorch.loggers import WandbLogger

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using: {device} ({torch.cuda.get_device_name(0) if device=='cuda' else 'CPU'})")

Using: cuda (Tesla T4)


In [None]:
# skip these

In [20]:
# remove corrupted file
corrupted = "/content/caltech101/101_ObjectCategories/cougar_body/image_0031.jpg"
if os.path.exists(corrupted):
    os.remove(corrupted)
    print("Removed corrupted cougar_body/image_0031.jpg")

In [15]:
# read data
model, _ = clip.load("ViT-B/32", device=device)
model.eval()

transform = transforms.Compose([
    transforms.Resize(224, interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
                         std=[0.26862954, 0.26130258, 0.27577711]),
])

dataset = ImageFolder('/content/caltech101/101_ObjectCategories', transform=transform)

dataloader = DataLoader(dataset, batch_size=256, shuffle=False,
                        num_workers=4, pin_memory=True,
                        prefetch_factor=4, persistent_workers=True)

print(f"Dataset size after fix: {len(dataset)} images")

Using: cuda (Tesla T4)
Dataset: 2033 images, 22 classes


In [19]:
# embed features with clip
@torch.no_grad()
def extract():
    feats, lbls = [], []
    for i, (x, y) in enumerate(dataloader):
        x = x.to(device, non_blocking=True)
        f = model.encode_image(x).float()
        f = f / f.norm(dim=-1, keepdim=True)
        feats.append(f.cpu())
        lbls.append(y)
        if (i+1) % 10 == 0:
            print(f"  batch {i+1}/{len(dataloader)}")
    return torch.cat(feats), torch.cat(lbls)

img_features, labels = extract()

Dataset size after fix: 2032 images
Extracting CLIP features...
DONE → torch.Size([2032, 512])
Saved clean features to Drive


In [None]:
# save embedded features
torch.save({'feats':img_features, 'labels':labels, 'classes':dataset.classes},
           '/content/drive/MyDrive/caltech101_clip_vitb32_clean.pt')

In [33]:
# start running HERE AFTER initial run!

In [17]:
# build dataset
# 1. Load your saved CLIP features
data = torch.load('/content/drive/MyDrive/caltech101_clip_vitb32_clean.pt')
feats = data['feats']          # [9145, 512]
labels = data['labels']        # [9145]
class_names = data['classes']
num_classes = len(class_names)

print(f"Loaded {feats.shape[0]} features → {num_classes} classes")

# 2. Proper Dataset class
class ClipFeatureDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features          # already float32 on CPU
        self.labels = labels.long()

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

full_dataset = ClipFeatureDataset(feats, labels)

train_portion = 0.8
train_size = int(train_portion * len(full_dataset))
val_size   = len(full_dataset) - train_size
train_ds, val_ds = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_ds, batch_size=512, shuffle=True,  num_workers=2, pin_memory=True)
val_loader   = DataLoader(val_ds,   batch_size=1024, shuffle=False, num_workers=2, pin_memory=True)


Loaded 2032 features → 22 classes


In [18]:
# create classifier

class ClipClassifier(L.LightningModule):
    def __init__(self, input_dim=512, num_classes=num_classes):
        super().__init__()
        self.save_hyperparameters()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, num_classes)
        )
        self.criterion = nn.CrossEntropyLoss()

    def forward(self, x):
        return self.net(x)

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('train_loss', loss, prog_bar=True)
        self.log('train_acc',  acc,  prog_bar=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = self.criterion(logits, y)
        acc = (logits.argmax(dim=1) == y).float().mean()
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc',  acc,  prog_bar=True)

    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=1e-3, weight_decay=1e-2)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50)
        return [optimizer], [scheduler]

In [23]:
# wandb setup

wandb.login()
# 6e4a7429f6845452ec376af3812c7dc1ef554ae7

wandb_logger = WandbLogger(
    project="Perona Lab Application",   # ← change or keep
    name="Caltech 101",        # ← optional run name
    log_model=True                     # ← saves best model to wandb
)

In [24]:
# train classifier

model = ClipClassifier()

trainer = L.Trainer(
    max_epochs=30,
    accelerator='gpu' if torch.cuda.is_available() else 'cpu',
    devices=1,
    precision='16-mixed',                     # 2× faster on GPU
    log_every_n_steps=1,
    logger=wandb_logger,
    callbacks=[
        EarlyStopping(monitor='val_acc', mode='max', patience=8),
        ModelCheckpoint(monitor='val_acc', mode='max', filename='best')
    ]
)


trainer.fit(model, train_loader, val_loader)

INFO: Using 16bit Automatic Mixed Precision (AMP)
INFO:lightning.pytorch.utilities.rank_zero:Using 16bit Automatic Mixed Precision (AMP)
INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores


INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
/usr/local/lib/python3.12/dist-packages/lightning/pytorch/utilities/model_summary/model_summary.py:231: Precision 16-mixed is not supported by the model summary.  Estimated model size in MB will not be accurate. Using 32 bits instead.
INFO: 
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | net       | Sequential       | 547 K  | train
1 | criterion | CrossEntropyLoss | 0      | train
-------------------------------------------------------
547 K     Trainable params
0         Non-trainable params
547 K     Total params
2.191     Total estimated model params size (MB)
6         Modules in train mode
0         Modules in eval mode
INFO:lightning.pytorch.callbacks.model_summary:
  | Name      | Type             | Params | Mode 
-------------------------------------------------------
0 | net       | Se

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO: `Trainer.fit` stopped: `max_epochs=30` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=30` reached.
