In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('clip_similarity_cifar10.csv')

cifar10_labels = {
    0: 'airplane',
    1: 'automobile',
    2: 'bird',
    3: 'cat',
    4: 'deer',
    5: 'dog',
    6: 'frog',
    7: 'horse',
    8: 'ship',
    9: 'truck'
}

In [3]:
df.head()

Unnamed: 0,index,true_label_idx,true_label,similarities
0,0,6,frog,"[0.125732421875, 0.1605224609375, 0.1800537109..."
1,1,9,truck,"[0.09979248046875, 0.1612548828125, 0.14794921..."
2,2,9,truck,"[0.11529541015625, 0.148681640625, 0.145874023..."
3,3,4,deer,"[0.1102294921875, 0.12322998046875, 0.14636230..."
4,4,1,automobile,"[0.1307373046875, 0.19189453125, 0.15466308593..."


In [4]:
import ast

# First, let's parse the similarities column and create a proper dataframe

# Parse similarities column and create expanded dataframe
similarities_data = []
for idx, row in df.iterrows():
    similarities = ast.literal_eval(row['similarities'])
    for pred_class, similarity in enumerate(similarities):
        similarities_data.append({
            'true_label_idx': row['true_label_idx'],
            'true_label': row['true_label'],
            'predicted_class': pred_class,
            'predicted_label': cifar10_labels[pred_class],
            'similarity': similarity
        })

expanded_df = pd.DataFrame(similarities_data)

# Group by true_label_idx and predicted_class, then calculate mean similarity
grouped_similarities = expanded_df.groupby(['true_label_idx', 'true_label', 'predicted_class', 'predicted_label'])['similarity'].mean().reset_index()

# For each true label, rank the predicted classes by similarity score
ranking_results = []
for true_idx in range(10):
    true_data = grouped_similarities[grouped_similarities['true_label_idx'] == true_idx].copy()
    true_data = true_data.sort_values('similarity', ascending=False)
    true_data['rank'] = range(1, len(true_data) + 1)
    ranking_results.append(true_data)

final_ranking = pd.concat(ranking_results, ignore_index=True)

# Create a pivot table to show rankings
ranking_pivot = final_ranking.pivot(index=['true_label_idx', 'true_label'], 
                                   columns='predicted_label', 
                                   values='rank')

print("Ranking of predicted classes for each true class (1 = highest similarity):")
print(ranking_pivot)

Ranking of predicted classes for each true class (1 = highest similarity):
predicted_label            airplane  automobile  bird  cat  deer  dog  frog  \
true_label_idx true_label                                                     
0              airplane           1           4     2   10     8    6     9   
1              automobile        10           1     6    9     5    7     8   
2              bird              10           9     1    8     2    4     5   
3              cat               10           9     3    1     6    2     7   
4              deer              10           8     4    7     1    3     9   
5              dog                9          10     4    6     5    1     7   
6              frog              10           7     3    4     6    2     1   
7              horse              9           7     5    8     2    3    10   
8              ship               8           4     3    9     5    6    10   
9              truck             10           2     5   

In [5]:
# Create a formatted printout showing the ranking of each predicted class for each true label
print("CLIP Similarity Rankings: Predicted Class Rankings for Each True Class")
print("=" * 70)
print("Format: For each true class, showing predicted classes ranked by similarity score")
print("(Rank 1 = highest similarity, Rank 10 = lowest similarity)")
print("=" * 70)

for true_idx in range(10):
    true_label = cifar10_labels[true_idx]
    print(f"\nTrue Class: {true_label.upper()} (Index {true_idx})")
    print("-" * 50)
    
    # Get rankings for this true class
    class_rankings = final_ranking[final_ranking['true_label_idx'] == true_idx].sort_values('rank')
    
    for _, row in class_rankings.iterrows():
        rank = row['rank']
        pred_label = row['predicted_label']
        similarity = row['similarity']
        
            
        print(f"{pred_label:10s}")

CLIP Similarity Rankings: Predicted Class Rankings for Each True Class
Format: For each true class, showing predicted classes ranked by similarity score
(Rank 1 = highest similarity, Rank 10 = lowest similarity)

True Class: AIRPLANE (Index 0)
--------------------------------------------------
airplane  
bird      
ship      
automobile
truck     
dog       
horse     
deer      
frog      
cat       

True Class: AUTOMOBILE (Index 1)
--------------------------------------------------
automobile
truck     
horse     
ship      
deer      
bird      
dog       
frog      
cat       
airplane  

True Class: BIRD (Index 2)
--------------------------------------------------
bird      
deer      
horse     
dog       
frog      
truck     
ship      
cat       
automobile
airplane  

True Class: CAT (Index 3)
--------------------------------------------------
cat       
dog       
bird      
horse     
truck     
deer      
frog      
ship      
automobile
airplane  

True Class: DEER (Inde

In [21]:
# Build dict keyed by class index containing label and the two lists
top2_5_dict = {}
top7_10_dict = {}
all_dict = {}
for idx in sorted(final_ranking['true_label_idx'].unique()):
    true_idx = int(idx)
    label = cifar10_labels[idx]
    sub = final_ranking[final_ranking['true_label_idx'] == idx].sort_values('rank')
    top2_5 = sub.loc[sub['rank'].between(2, 5), 'predicted_label'].tolist()
    top7_10 = sub.loc[sub['rank'].between(7, 10), 'predicted_label'].tolist()
    all = sub['predicted_label'].tolist()
    top2_5_dict[true_idx] = top2_5
    top7_10_dict[true_idx] = top7_10
    all_dict[true_idx] = all

all_dict

{0: ['airplane',
  'bird',
  'ship',
  'automobile',
  'truck',
  'dog',
  'horse',
  'deer',
  'frog',
  'cat'],
 1: ['automobile',
  'truck',
  'horse',
  'ship',
  'deer',
  'bird',
  'dog',
  'frog',
  'cat',
  'airplane'],
 2: ['bird',
  'deer',
  'horse',
  'dog',
  'frog',
  'truck',
  'ship',
  'cat',
  'automobile',
  'airplane'],
 3: ['cat',
  'dog',
  'bird',
  'horse',
  'truck',
  'deer',
  'frog',
  'ship',
  'automobile',
  'airplane'],
 4: ['deer',
  'horse',
  'dog',
  'bird',
  'truck',
  'ship',
  'cat',
  'automobile',
  'frog',
  'airplane'],
 5: ['dog',
  'horse',
  'truck',
  'bird',
  'deer',
  'cat',
  'frog',
  'ship',
  'airplane',
  'automobile'],
 6: ['frog',
  'dog',
  'bird',
  'cat',
  'truck',
  'deer',
  'automobile',
  'horse',
  'ship',
  'airplane'],
 7: ['horse',
  'deer',
  'dog',
  'truck',
  'bird',
  'ship',
  'automobile',
  'cat',
  'airplane',
  'frog'],
 8: ['ship',
  'truck',
  'bird',
  'automobile',
  'deer',
  'dog',
  'horse',
  'airpl

In [15]:
# take the first instance of each true class, rank its similarity vector,
# and produce two dicts: top_dict (top-4 predicted labels) and bottom_dict (bottom-4 predicted labels)
top_dict = {}
bottom_dict = {}
all_dict = {}

# df is the original dataframe; ast was imported earlier
first_rows = df.groupby('true_label_idx', sort=True).first().reset_index()

for _, row in first_rows.iterrows():
    true_idx = int(row['true_label_idx'])
    sims = ast.literal_eval(row['similarities'])
    # pair (predicted_index, similarity) and sort desc by similarity
    ranked = sorted(enumerate(sims), key=lambda t: t[1], reverse=True)
    top4 = [cifar10_labels[i] for i, _ in ranked[:4]]
    bottom4 = [cifar10_labels[i] for i, _ in ranked[-4:]]
    top_dict[true_idx] = top4
    bottom_dict[true_idx] = bottom4
    all = [cifar10_labels[i] for i, _ in ranked]
    all_dict[true_idx] = all


top_dict, bottom_dict, all_dict

({0: ['bird', 'airplane', 'ship', 'truck'],
  1: ['automobile', 'truck', 'horse', 'ship'],
  2: ['bird', 'deer', 'horse', 'truck'],
  3: ['cat', 'bird', 'dog', 'horse'],
  4: ['deer', 'horse', 'truck', 'bird'],
  5: ['dog', 'truck', 'horse', 'bird'],
  6: ['frog', 'dog', 'cat', 'deer'],
  7: ['horse', 'deer', 'truck', 'dog'],
  8: ['ship', 'bird', 'deer', 'automobile'],
  9: ['truck', 'ship', 'automobile', 'bird']},
 {0: ['horse', 'frog', 'automobile', 'cat'],
  1: ['dog', 'frog', 'airplane', 'cat'],
  2: ['dog', 'frog', 'airplane', 'cat'],
  3: ['deer', 'airplane', 'ship', 'automobile'],
  4: ['automobile', 'cat', 'airplane', 'frog'],
  5: ['airplane', 'cat', 'automobile', 'ship'],
  6: ['truck', 'automobile', 'ship', 'airplane'],
  7: ['frog', 'cat', 'airplane', 'automobile'],
  8: ['horse', 'cat', 'airplane', 'frog'],
  9: ['deer', 'cat', 'frog', 'airplane']},
 {0: ['bird',
   'airplane',
   'ship',
   'truck',
   'deer',
   'dog',
   'horse',
   'frog',
   'automobile',
   'cat'],


In [22]:
import json, pprint
print(json.dumps(all_dict, ensure_ascii=False))          # compact single line
pprint.pprint(all_dict, width=200)  

{"0": ["airplane", "bird", "ship", "automobile", "truck", "dog", "horse", "deer", "frog", "cat"], "1": ["automobile", "truck", "horse", "ship", "deer", "bird", "dog", "frog", "cat", "airplane"], "2": ["bird", "deer", "horse", "dog", "frog", "truck", "ship", "cat", "automobile", "airplane"], "3": ["cat", "dog", "bird", "horse", "truck", "deer", "frog", "ship", "automobile", "airplane"], "4": ["deer", "horse", "dog", "bird", "truck", "ship", "cat", "automobile", "frog", "airplane"], "5": ["dog", "horse", "truck", "bird", "deer", "cat", "frog", "ship", "airplane", "automobile"], "6": ["frog", "dog", "bird", "cat", "truck", "deer", "automobile", "horse", "ship", "airplane"], "7": ["horse", "deer", "dog", "truck", "bird", "ship", "automobile", "cat", "airplane", "frog"], "8": ["ship", "truck", "bird", "automobile", "deer", "dog", "horse", "airplane", "cat", "frog"], "9": ["truck", "automobile", "ship", "horse", "bird", "deer", "dog", "frog", "cat", "airplane"]}
{0: ['airplane', 'bird', 'shi

In [14]:
def sample_other_labels(true_label, k=4, labels_map=cifar10_labels, seed=None):
    """
    Return k distinct class indices and labels different from true_label.
    - true_label can be an int index or a string label.
    - k is how many different classes to sample (default 4).
    - seed can be an int for reproducibility or None for random behavior.
    Returns: dict with keys 'indices' (list[int]) and 'labels' (list[str]).
    """
    # resolve true_label to index
    if isinstance(true_label, (int, np.integer)):
        true_idx = int(true_label)
        if true_idx not in labels_map:
            raise ValueError(f"true_label index {true_idx} not found in labels_map")
    elif isinstance(true_label, str):
        # find key by value
        matches = [k0 for k0, v0 in labels_map.items() if v0 == true_label]
        if not matches:
            raise ValueError(f"true_label '{true_label}' not found in labels_map")
        true_idx = matches[0]
    else:
        raise TypeError("true_label must be int index or str label")

    all_indices = sorted(labels_map.keys())
    pool = [i for i in all_indices if i != true_idx]

    if k < 0:
        raise ValueError("k must be non-negative")
    if k > len(pool):
        raise ValueError(f"k={k} is larger than available classes ({len(pool)}) excluding the true label")

    rng = np.random.default_rng(seed)
    selected = rng.choice(pool, size=k, replace=False)
    selected_list = selected.tolist()
    # return {'indices': selected_list, 'labels': [labels_map[i] for i in selected_list]}
    return [labels_map[i] for i in selected_list]

# Example usage:
sample_other_labels(0, k=4, seed=42)          # using index
# sample_other_labels('truck', k=4, seed=42)    # using label string

['deer', 'automobile', 'ship', 'frog']

### CIFAR20

In [2]:
df = pd.read_csv('clip_similarity_cifar20.csv')

cifar20_labels = {
    0: "aquatic mammals",
    1: "fish",
    2: "flowers",
    3: "food containers",
    4: "fruit and vegetables",
    5: "household electrical devices",
    6: "household furniture",
    7: "insects",
    8: "large carnivores",
    9: "large man-made outdoor things",
    10: "large natural outdoor scenes",
    11: "large omnivores and herbivores",
    12: "medium-sized mammals",
    13: "non-insect invertebrates",
    14: "people",
    15: "reptiles",
    16: "small mammals",
    17: "trees",
    18: "vehicles 1",
    19: "vehicles 2"
}



In [3]:
df.head()

Unnamed: 0,index,true_label_idx,true_label,similarities
0,0,11,large_omnivores_and_herbivores,"[0.18310546875, 0.1553955078125, 0.14245605468..."
1,1,15,reptiles,"[0.1937255859375, 0.139404296875, 0.1224975585..."
2,2,4,fruit_and_vegetables,"[0.185302734375, 0.1961669921875, 0.1837158203..."
3,3,14,people,"[0.18212890625, 0.164306640625, 0.162353515625..."
4,4,1,fish,"[0.193603515625, 0.21728515625, 0.140747070312..."


In [4]:
def rank_similarities_by_mean(df, labels_map):
    """
    Process a dataframe with similarity data, group by true_label_idx,
    calculate mean similarity for each predicted class, and rank them.
    
    Parameters:
    - df: DataFrame with columns ['true_label_idx', 'true_label', 'similarities']
          where 'similarities' is a string representation of a list of similarity scores
    - labels_map: dict mapping class indices to class label strings
    
    Returns:
    - dict where keys are true_label_idx (int) and values are lists of 
      predicted class labels ranked by mean similarity (highest to lowest)
    """
    import ast
    
    # Parse similarities column and create expanded dataframe
    similarities_data = []
    for idx, row in df.iterrows():
        similarities = ast.literal_eval(row['similarities'])
        for pred_class, similarity in enumerate(similarities):
            similarities_data.append({
                'true_label_idx': row['true_label_idx'],
                'true_label': row['true_label'],
                'predicted_class': pred_class,
                'predicted_label': labels_map[pred_class],
                'similarity': similarity
            })
    
    expanded_df = pd.DataFrame(similarities_data)
    
    # Group by true_label_idx and predicted_class, then calculate mean similarity
    grouped_similarities = expanded_df.groupby(
        ['true_label_idx', 'true_label', 'predicted_class', 'predicted_label']
    )['similarity'].mean().reset_index()
    
    # For each true label, rank the predicted classes by similarity score
    ranking_results = []
    for true_idx in sorted(grouped_similarities['true_label_idx'].unique()):
        true_data = grouped_similarities[grouped_similarities['true_label_idx'] == true_idx].copy()
        true_data = true_data.sort_values('similarity', ascending=False)
        true_data['rank'] = range(1, len(true_data) + 1)
        ranking_results.append(true_data)
    
    final_ranking = pd.concat(ranking_results, ignore_index=True)
    
    # Build dictionary with ranked labels
    ranked_dict = {}
    for true_idx in sorted(final_ranking['true_label_idx'].unique()):
        true_idx_int = int(true_idx)
        sub = final_ranking[final_ranking['true_label_idx'] == true_idx].sort_values('rank')
        ranked_labels = sub['predicted_label'].tolist()
        ranked_dict[true_idx_int] = ranked_labels
    
    return ranked_dict

# Test the function with CIFAR-20 data
cifar20_ranked = rank_similarities_by_mean(df, cifar20_labels)
cifar20_ranked

{0: ['aquatic mammals',
  'medium-sized mammals',
  'small mammals',
  'fish',
  'large omnivores and herbivores',
  'large carnivores',
  'insects',
  'large natural outdoor scenes',
  'people',
  'non-insect invertebrates',
  'vehicles 2',
  'vehicles 1',
  'reptiles',
  'household furniture',
  'fruit and vegetables',
  'flowers',
  'large man-made outdoor things',
  'household electrical devices',
  'trees',
  'food containers'],
 1: ['aquatic mammals',
  'fish',
  'large omnivores and herbivores',
  'insects',
  'medium-sized mammals',
  'small mammals',
  'people',
  'non-insect invertebrates',
  'large carnivores',
  'large natural outdoor scenes',
  'vehicles 2',
  'vehicles 1',
  'reptiles',
  'household furniture',
  'fruit and vegetables',
  'flowers',
  'household electrical devices',
  'food containers',
  'trees',
  'large man-made outdoor things'],
 2: ['flowers',
  'insects',
  'non-insect invertebrates',
  'aquatic mammals',
  'people',
  'fruit and vegetables',
  'sma

In [8]:
import json, pprint
# print(json.dumps(cifar20_ranked, ensure_ascii=False))          # compact single line
pprint.pprint(cifar20_ranked, width=400)  

{0: ['aquatic mammals', 'medium-sized mammals', 'small mammals', 'fish', 'large omnivores and herbivores', 'large carnivores', 'insects', 'large natural outdoor scenes', 'people', 'non-insect invertebrates', 'vehicles 2', 'vehicles 1', 'reptiles', 'household furniture', 'fruit and vegetables', 'flowers', 'large man-made outdoor things', 'household electrical devices', 'trees', 'food containers'],
 1: ['aquatic mammals', 'fish', 'large omnivores and herbivores', 'insects', 'medium-sized mammals', 'small mammals', 'people', 'non-insect invertebrates', 'large carnivores', 'large natural outdoor scenes', 'vehicles 2', 'vehicles 1', 'reptiles', 'household furniture', 'fruit and vegetables', 'flowers', 'household electrical devices', 'food containers', 'trees', 'large man-made outdoor things'],
 2: ['flowers', 'insects', 'non-insect invertebrates', 'aquatic mammals', 'people', 'fruit and vegetables', 'small mammals', 'fish', 'medium-sized mammals', 'household furniture', 'large natural outdo