# t-SNE visualization for CLIP features

This notebook:
1. Sets up the repo and environment (Colab-ready)
2. Downloads Oxford Pets dataset
3. Extracts CLIP image features and labels
4. Runs t-SNE visualization to show discrimination ability


In [None]:
# 0) GPU + repo setup
!nvidia-smi
%cd /content
!git clone -b hamza/discrim https://github.com/1hamzaiqbal/MFCLIP_acv
%cd MFCLIP_acv



Repo root: /Users/hamzaiqbal/grad/comp_vision/project_mf-clip/MFCLIP_acv
Data root: /content/data
Trainer cfg: /Users/hamzaiqbal/grad/comp_vision/project_mf-clip/MFCLIP_acv/configs/trainers/CoOp/rn50.yaml
Dataset cfg: /Users/hamzaiqbal/grad/comp_vision/project_mf-clip/MFCLIP_acv/configs/datasets/oxford_pets.yaml
Split: test


In [None]:
# 1) Install dependencies
!pip install torch torchvision timm einops yacs tqdm opencv-python scikit-learn scipy pyyaml ruamel.yaml pytorch-ignite foolbox pandas matplotlib seaborn wilds ftfy



Running:
 /usr/local/bin/python3 /Users/hamzaiqbal/grad/comp_vision/project_mf-clip/MFCLIP_acv/lpclip/feat_extractor.py --root /content/data --output-dir /Users/hamzaiqbal/grad/comp_vision/project_mf-clip/MFCLIP_acv/clip_feat --config-file /Users/hamzaiqbal/grad/comp_vision/project_mf-clip/MFCLIP_acv/configs/trainers/CoOp/rn50.yaml --dataset-config-file /Users/hamzaiqbal/grad/comp_vision/project_mf-clip/MFCLIP_acv/configs/datasets/oxford_pets.yaml --split test

Traceback (most recent call last):
  File "/Users/hamzaiqbal/grad/comp_vision/project_mf-clip/MFCLIP_acv/lpclip/feat_extractor.py", line 3, in <module>
    import torch
ModuleNotFoundError: No module named 'torch'



RuntimeError: Feature extraction failed; see stderr above.

In [None]:
# 2) Download Oxford Pets dataset
from torchvision.datasets import OxfordIIITPet
from torchvision import transforms
from pathlib import Path

root = Path("/content/data/oxford_pets")
root.mkdir(parents=True, exist_ok=True)
_ = OxfordIIITPet(root=str(root), download=True, transform=transforms.ToTensor())
print("Oxford Pets downloaded to", root)

# Also fetch annotations
%cd /content
!mkdir -p /content/data/oxford_pets
!wget -q https://www.robots.ox.ac.uk/~vgg/data/pets/data/images.tar.gz
!wget -q https://www.robots.ox.ac.uk/~vgg/data/pets/data/annotations.tar.gz
!tar -xf images.tar.gz -C /content/data/oxford_pets
!tar -xf annotations.tar.gz -C /content/data/oxford_pets
!ls /content/data/oxford_pets/annotations | head



NameError: name 'npz_path' is not defined

In [None]:
# 3) Extract features for t-SNE visualization
# Change split to "train", "val", or "test" as needed
%cd /content/MFCLIP_acv
%env TF_CPP_MIN_LOG_LEVEL=2

split = "test"  # Can change to "train" or "val"

import subprocess
import sys

cmd = [
    sys.executable, "lpclip/feat_extractor.py",
    "--root", "/content/data",
    "--output-dir", "/content/MFCLIP_acv/clip_feat",
    "--config-file", "configs/trainers/CoOp/rn50.yaml",
    "--dataset-config-file", "configs/datasets/oxford_pets.yaml",
    "--split", split
]

print(f"Running feature extraction for split: {split}")
result = subprocess.run(cmd, capture_output=True, text=True)
print(result.stdout)
if result.returncode != 0:
    print("Error:", result.stderr)
    raise RuntimeError("Feature extraction failed")

# Verify output exists
import os
npz_path = f"/content/MFCLIP_acv/clip_feat/OxfordPets/{split}.npz"
assert os.path.exists(npz_path), f"Features not found at {npz_path}"
print(f"\nFeatures extracted successfully to: {npz_path}")



In [None]:
# 4) Run t-SNE visualization
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import seaborn as sns

# Load features and labels
npz = np.load(npz_path)
X = np.array(npz["feature_list"], dtype=np.float32)
y = np.array(npz["label_list"], dtype=np.int32)
print(f"Features shape: {X.shape}, Labels shape: {y.shape}")
print(f"Number of classes: {len(np.unique(y))}")

# Optional subsample for faster computation and clearer plots
max_points = 4000
if len(X) > max_points:
    rng = np.random.default_rng(0)
    idx = rng.choice(len(X), size=max_points, replace=False)
    X = X[idx]
    y = y[idx]
    print(f"Subsampled to {len(X)} points for plotting")

# Run t-SNE
perplexity = min(30, max(5, len(X)//100))  # Adaptive perplexity
print(f"Running t-SNE with perplexity={perplexity}...")
tsne = TSNE(n_components=2, init="pca", learning_rate="auto", perplexity=perplexity, random_state=0, n_iter=1000)
Z = tsne.fit_transform(X)
print("t-SNE completed!")

# Plot
plt.figure(figsize=(12, 10))
unique_labels = np.unique(y)
palette = sns.color_palette("tab20", n_colors=len(unique_labels))

for i, cls in enumerate(unique_labels):
    mask = (y == cls)
    plt.scatter(Z[mask, 0], Z[mask, 1], s=10, color=palette[i % len(palette)], 
                label=f"Class {cls}", alpha=0.6, edgecolors='none')

plt.title(f"t-SNE Visualization of CLIP Features ({split} split)\nShowing discrimination ability across {len(unique_labels)} classes", 
          fontsize=14, fontweight='bold')
plt.xlabel("t-SNE dimension 1", fontsize=12)
plt.ylabel("t-SNE dimension 2", fontsize=12)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8, ncol=2)
plt.tight_layout()

# Save figure
output_path = f"/content/MFCLIP_acv/tsne_{split}.png"
plt.savefig(output_path, dpi=200, bbox_inches='tight')
print(f"\nSaved visualization to: {output_path}")
plt.show()

# Print some statistics
print(f"\nStatistics:")
print(f"  - Total samples: {len(y)}")
print(f"  - Number of classes: {len(unique_labels)}")
print(f"  - Samples per class: {np.bincount(y)}")
