# Same Class Different Color and Texture (SCDCT) Comparison

This notebook compares CVCL and CLIP models on prototype evaluation where distractors are the same class but differ in color AND texture.
For example, testing a rough red apple against smooth green apple, shiny yellow apple, matte blue apple.

In [1]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ------------------------------- ------- 10.5/12.8 MB 65.5 MB/s eta 0:00:01
     ---------------------------------------- 12.8/12.8 MB 61.8 MB/s  0:00:00
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [2]:
import os
import sys
import random
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from collections import defaultdict

# Path setup
REPO_ROOT = os.path.abspath(os.path.join(os.getcwd(), os.pardir, os.pardir, os.pardir, os.pardir))

# Add discover-hidden-visual-concepts to path
DISCOVER_ROOT = os.path.join(REPO_ROOT, 'discover-hidden-visual-concepts')
sys.path.insert(0, DISCOVER_ROOT)
sys.path.insert(0, REPO_ROOT)

# Import from discover-hidden-visual-concepts repo
sys.path.append(os.path.join(DISCOVER_ROOT, 'src'))
from utils.model_loader import load_model
from models.feature_extractor import FeatureExtractor
from models.multimodal.multimodal_lit import MultiModalLitModel

# hard-coded paths
CSV_PATH = os.path.join(REPO_ROOT, 'data', 'KonkLab', 'testdata.csv')
IMG_DIR = os.path.join(REPO_ROOT, 'data', 'KonkLab', '17-objects')
MASTER_CSV = os.path.join(REPO_ROOT, 'PatrickProject', 'Chart_Generation', 'all_prototype_results.csv')

  from pkg_resources import packaging


In [3]:
# Shared Dataset and Helper Functions
class CTImageDataset(Dataset):
    """Dataset returning (img_tensor, class, color, texture, idx)."""
    def __init__(self, csv_path, img_dir, transform):
        self.df = pd.read_csv(csv_path)
        required = ['Filename','Class','Color','Texture']
        assert all(c in self.df for c in required), f"CSV must contain columns: {required}"
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        cls, col, tex = row['Class'], row['Color'], row['Texture']
        fn = row['Filename']
        path = os.path.join(self.img_dir, cls, fn)
        img = Image.open(path).convert('RGB')
        return self.transform(img), cls, col, tex, idx

def collate_fn(batch):
    imgs = torch.stack([b[0] for b in batch])
    classes = [b[1] for b in batch]
    colors = [b[2] for b in batch]
    textures = [b[3] for b in batch]
    idxs = [b[4] for b in batch]
    return imgs, classes, colors, textures, idxs

def run_scdct_test(model_name, seed=0, device='cuda' if torch.cuda.is_available() else 'cpu', batch_size=64, trials_per_tuple=10, max_images=None):
    """Run Same Class Different Color and Texture (SCDCT) evaluation.
    
    Tests if model can identify objects when distractors are from the SAME class
    but differ in BOTH color and texture:
    - Same class
    - Different color
    - Different texture
    
    This is a harder test as distractors share the class but differ in visual attributes.
    Example: rough red apple vs. smooth green apple, shiny yellow apple, matte blue apple
    """
    random.seed(seed)
    torch.manual_seed(seed)

    # 1) load model & transform
    model, transform = load_model(model_name, seed=seed, device=device)
    extractor = FeatureExtractor(model_name, model, device)
    print(f"[INFO] Loaded model '{model_name}'")

    # 2) prepare DataLoader & extract embeddings
    ds = CTImageDataset(CSV_PATH, IMG_DIR, transform)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=False, num_workers=0, collate_fn=collate_fn)

    all_embs, all_meta, all_idxs = [], [], []
    with torch.no_grad():
        for imgs, classes, colors, textures, idxs in loader:
            feats = extractor.get_img_feature(imgs.to(device))
            feats = extractor.norm_features(feats).cpu()
            feats = feats.float()
            all_embs.append(feats)
            all_meta.extend(zip(classes, colors, textures))
            all_idxs.extend(idxs)
    all_embs = torch.cat(all_embs, dim=0)
    print(f"[INFO] Extracted embeddings for {len(all_idxs)} images")

    # 3) group by (class, color, texture) triple
    tuple2idxs = defaultdict(list)
    for idx, meta in zip(all_idxs, all_meta):
        tuple2idxs[meta].append(idx)

    # 4) run evaluation
    total_correct = 0
    total_trials = 0
    tuple_results = {}
    
    print("[INFO] Running 4-way trials: distractors with same class but different color & texture")
    for (cls, col, tex), idx_list in tuple2idxs.items():
        # Get pool: SAME class but different color AND different texture
        distractor_pool = [i for m, i in zip(all_meta, all_idxs) if m[0] == cls and m[1] != col and m[2] != tex]
        
        if len(idx_list) < 1 or len(distractor_pool) < 3:
            continue

        correct = 0
        for _ in range(trials_per_tuple):
            q = random.choice(idx_list)
            same_rest = [i for i in idx_list if i != q]
            if same_rest:
                proto = all_embs[[all_idxs.index(i) for i in same_rest]].mean(0)
            else:
                proto = all_embs[all_idxs.index(q)]
            proto = proto / proto.norm()

            distractors = random.sample(distractor_pool, 3)
            candidates = [q] + distractors
            feats_cand = all_embs[[all_idxs.index(i) for i in candidates]]
            sims = feats_cand @ proto
            guess = candidates[sims.argmax().item()]

            correct += int(guess == q)
            total_correct += int(guess == q)
            total_trials += 1

        acc = correct / trials_per_tuple
        key = f"{cls}-{col}-{tex}"
        tuple_results[key] = {'correct': correct, 'trials': trials_per_tuple, 'accuracy': acc}
        print(f"{cls:12s} / {col:10s} / {tex:10s} : {correct}/{trials_per_tuple} ({acc:.1%})")

    overall_acc = total_correct / total_trials if total_trials else 0.0
    print(f"\n[OK] Overall accuracy: {total_correct}/{total_trials} ({overall_acc:.1%})")
    
    # 5) save results
    summary_df = pd.DataFrame([{'Model': model_name, 'Test': 'Same-Class-Different-Color-Texture', 'Correct': total_correct, 'Trials': total_trials, 'Accuracy': overall_acc}])
    
    os.makedirs(os.path.dirname(MASTER_CSV), exist_ok=True)
    if os.path.exists(MASTER_CSV):
        summary_df.to_csv(MASTER_CSV, mode='a', header=False, index=False, float_format='%.4f')
    else:
        summary_df.to_csv(MASTER_CSV, index=False, float_format='%.4f')

    return tuple_results, overall_acc

## CVCL Test

In [4]:
# Run CVCL evaluation
cvcl_results, cvcl_overall = run_scdct_test('cvcl-resnext')

print("\nCVCL Results by Class-Color-Texture:")
for key, res in cvcl_results.items():
    print(f"{key:35s}: {res['correct']}/{res['trials']} ({res['accuracy']:.1%})")
print(f"\nCVCL Overall Accuracy: {cvcl_overall:.1%}")

Loading checkpoint from C:\Users\jbats\.cache\huggingface\hub\models--wkvong--cvcl_s_dino_resnext50_embedding\snapshots\f50eaa0c50a6076a5190b1dd52aeeb6c3e747045\cvcl_s_dino_resnext50_embedding.ckpt


Lightning automatically upgraded your loaded checkpoint from v1.5.8 to v2.5.2. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint C:\Users\jbats\.cache\huggingface\hub\models--wkvong--cvcl_s_dino_resnext50_embedding\snapshots\f50eaa0c50a6076a5190b1dd52aeeb6c3e747045\cvcl_s_dino_resnext50_embedding.ckpt`


[INFO] Loaded model 'cvcl-resnext'
[INFO] Extracted embeddings for 1005 images
[INFO] Running 4-way trials: distractors with same class but different color & texture
pitcher      / Yellow     / textured   : 0/10 (0.0%)
pitcher      / Multicolored / textured   : 3/10 (30.0%)
pitcher      / Green      / smooth     : 10/10 (100.0%)
pitcher      / Grey       / smooth     : 10/10 (100.0%)
pitcher      / Orange     / textured   : 10/10 (100.0%)
pitcher      / Blue       / smooth     : 10/10 (100.0%)
headband     / Purple     / smooth     : 10/10 (100.0%)
headband     / Grey       / textured   : 10/10 (100.0%)
headband     / Orange     / textured   : 10/10 (100.0%)
headband     / Multicolored / smooth     : 6/10 (60.0%)
headband     / Red        / textured   : 10/10 (100.0%)
headband     / Pink       / textured   : 10/10 (100.0%)
grill        / Multicolored / smooth     : 1/10 (10.0%)
grill        / Black      / textured   : 9/10 (90.0%)
grill        / Blue       / smooth     : 10/10 (100.0%)

## CLIP Test

In [5]:
# Run CLIP evaluation
clip_results, clip_overall = run_scdct_test('clip-resnext')

print("\nCLIP Results by Class-Color-Texture:")
for key, res in clip_results.items():
    print(f"{key:35s}: {res['correct']}/{res['trials']} ({res['accuracy']:.1%})")
print(f"\nCLIP Overall Accuracy: {clip_overall:.1%}")

[INFO] Loaded model 'clip-resnext'


  attn_output = scaled_dot_product_attention(q, k, v, attn_mask, dropout_p, is_causal)


[INFO] Extracted embeddings for 1005 images
[INFO] Running 4-way trials: distractors with same class but different color & texture
pitcher      / Yellow     / textured   : 4/10 (40.0%)
pitcher      / Multicolored / textured   : 3/10 (30.0%)
pitcher      / Green      / smooth     : 10/10 (100.0%)
pitcher      / Grey       / smooth     : 10/10 (100.0%)
pitcher      / Orange     / textured   : 4/10 (40.0%)
pitcher      / Blue       / smooth     : 10/10 (100.0%)
headband     / Purple     / smooth     : 10/10 (100.0%)
headband     / Grey       / textured   : 10/10 (100.0%)
headband     / Orange     / textured   : 10/10 (100.0%)
headband     / Multicolored / smooth     : 6/10 (60.0%)
headband     / Red        / textured   : 10/10 (100.0%)
headband     / Pink       / textured   : 10/10 (100.0%)
grill        / Multicolored / smooth     : 3/10 (30.0%)
grill        / Black      / textured   : 9/10 (90.0%)
grill        / Blue       / smooth     : 10/10 (100.0%)
grill        / Grey       / texture