In [None]:
import json
import pandas as pd
import joblib
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

np.random.seed(81021)

with open("./data/result_summary.json", 'r') as f:
    result_summary = json.load(f)

result_summary = pd.DataFrame(result_summary).T
res_mat = result_summary.values
res_mat = (res_mat - np.mean(res_mat, axis=0, keepdims=True)) / np.std(res_mat, axis=0, keepdims=True)

res_mat

In [2]:
def clustering(data, n_clusters):
    X = data.copy()

    sse = []
    for k in range(1, 11):
        kmeans = KMeans(n_clusters=k)
        kmeans.fit(X)
        sse.append(kmeans.inertia_)

    plt.plot(range(1, 11), sse, marker='o')
    plt.title('Elbow Method')
    plt.xlabel('Number of clusters')
    plt.ylabel('SSE')
    plt.show()

    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(X)
    y_kmeans = kmeans.predict(X)

    one_hot = np.eye(n_clusters)[y_kmeans]
    print(one_hot.sum(axis=0))

    return one_hot

# Ground Truth Clustering

In [3]:
one_hot = clustering(res_mat, 5)

In [4]:
with open("data/model_profiles.json", 'r') as f:
    model_profiles = json.load(f)

for i, m in enumerate(result_summary.index):
    model_profiles[m]['gt_cluster'] = list(one_hot[i])

with open("data/model_profiles.json", 'w') as f:
    json.dump(model_profiles, f, indent=4)

In [5]:
one_hot = clustering(res_mat.T, 5)

In [6]:
with open("data/dataset_profiles.json", 'r') as f:
    dataset_profiles = json.load(f)

for i, d in enumerate(result_summary.columns):
    dataset_profiles[d] = {}
    dataset_profiles[d]['gt_cluster'] = list(one_hot[i])

with open("data/dataset_profiles.json", 'w') as f:
    json.dump(dataset_profiles, f, indent=4)

# Hidden States Clustering

In [7]:
from utils.config import ALL_DATASETS

avg_hs = joblib.load("data/dataset_avg_hs_llava7B.pkl")
hs = []

for data in ALL_DATASETS:
    hs.append(avg_hs[data]['hs'])

hs = np.array(hs)
hs.shape

In [8]:
one_hot = clustering(hs, 5)

with open("data/dataset_profiles.json", 'r') as f:
    dataset_profiles = json.load(f)

for i, d in enumerate(result_summary.columns):
    if d not in dataset_profiles:
        dataset_profiles[d] = {}
    dataset_profiles[d]['hs_cluster'] = list(one_hot[i])

with open("data/dataset_profiles.json", 'w') as f:
    json.dump(dataset_profiles, f, indent=4)

# Language Description Clustering

In [None]:
from sentence_transformers import SentenceTransformer

des_datasets = [
'Instance Attributes',
 'Emotion Recognition',
 'Global Video Understanding',
 'Chart Understanding',
 'Landmark Recognition',
 'Instances Counting',
 'Action Prediction',
 'Text Understanding',
 'Text-to-Image Generation',
 'Action Recognition',
 'Instance Location',
 'Instance Interaction',
 'Scene Understanding',
 'Instance Identity',
 'Text-Image Creation',
 'Visual Mathematics',
 'Difference Spotting',
 'Spatial Relation',
 'Science Knowledge',
 'Procedure Understanding',
 'Visual Reasoning',
 'In-Context Captioning',
 'Meme Comprehension',
 'Celebrity Recognition',
 'Visual Referring Expression',
 'Interleaved Image-Text Analysis',
 'Next Image Prediction',
 'Celebrity',
 'Posters',
 'Position',
 'Scene',
 'Commonsense Reasoning',
 'Artwork',
 'Landmark',
 'Text Translation',
 'Existence',
 'Numerical Calculation',
 'Count',
 'Color',
 'OCR',
 'Code Reasoning',
 'Social Relation',
 'Object Localization',
 'Future Prediction',
 'Physical Property Reasoning',
 'Attribute Comparison',
 'Nature Relation',
 'Action Recognition',
 'Image Scene',
 'Celebrity Recognition',
 'OCR',
 'Spatial Relationship',
 'Structuralized Image-Text Understanding',
 'Image Emotion',
 'Function Reasoning',
 'Identity Reasoning',
 'Physical Relation',
 'Image Style',
 'Attribute Recognition',
 'Image Quality',
 'Image Topic',
 'Social Relation',
 'Object Localization',
 'Future Prediction',
 'Physical Property Reasoning',
 'Attribute Comparison',
 'Nature Relation',
 'Action Recognition',
 'Image Scene',
 'Celebrity Recognition',
 'OCR',
 'Spatial Relationship',
 'Structuralized Image-Text Understanding',
 'Image Emotion',
 'Function Reasoning',
 'Identity Reasoning',
 'Physical Relation',
 'Image Style',
 'Attribute Recognition',
 'Image Quality',
 'Image Topic',
 'Mechanical Engineering',
 'Basic Medical Science',
 'Math',
 'Pharmacy',
 'Public Health',
 'Physics',
 'Energy and Power',
 'Sociology',
 'Art Theory',
 'History',
 'Materials',
 'Geography',
 'Chemistry',
 'Electronics',
 'Economics',
 'Art',
 'Accounting',
 'Psychology',
 'Architecture and Engineering',
 'Manage',
 'Clinical Medicine',
 'Music',
 'Finance',
 'Marketing',
 'Design',
 'Literature',
 'Biology',
 'Diagnostics and Laboratory Medicine',
 'Computer Science',
 'Agriculture',
 'Technology and Engineering',
 'Business',
 'Health and Medicine',
 'Humanities and Social Sciences',
 'Science',
 'Arts and Design',
 'Ecosystems',
 'English Colonies in North America',
 'State Capitals',
 'Designing Experiments',
 'Materials',
 'Adaptations',
 'Velocity, Acceleration, and Forces',
 'Particle Motion and Energy',
 'Geography',
 'Magnets',
 'Astronomy',
 'Oceania: Geography',
 'Weather and Climate',
 'The Americas: Geography',
 'Classification and Scientific Names',
 'Engineering Practices',
 'Atoms and Molecules',
 'Scientific Names',
 'Solutions',
 'Maps',
 'Genes to Traits',
 'Physical Geography',
 'Classification',
 'Basic Economic Principles',
 'Colonial America',
 '2D Count',
 '3D Distance',
 '2D Relation',
 '3D Depth',
 'DECIMER: a hand-drawn molecule image dataset consisting of chemical structure as the images and their SMILES representation as the strings',
 'Enrico: a topic modeling dataset for mobile UI screens',
 'FaceEmotion: a classic dataset for facial expression recognition',
 'Flickr30k: an image captioning dataset collected from Flickr',
 'GQA: builds up on scene graph structures for reasoning questions',
 'HatefulMemes: a challenge hosted by Meta to classify if a meme image along with its text caption describes hateful intentions',
 'INAT: an image classification dataset for 5000 wildlife species of plants and animals',
 'IRFL: an image-text dataset for figurative language understanding',
 'MemeCaps: a meme captioning dataset',
 'Memotion: sentiment classification, humor classification, and the scale of semantic classes',
 'MMIMDB: a genre prediction dataset that consists of an image of the poster of the movie along with the plot',
 'NewYorkerCartoon: collected from the weekly New Yorker magazine cartoon captioning contest, where readers are tasked to give a humorous caption for a cartoon image and the funniest captions are selected based on public votes',
 'NLVR: image-text pairs for visual reasoning.  Images are created by generating objects and their properties randomly.',
 'NLVR2: real-world photographs and captions for these photographs',
 'NoCaps: a large scale image captioning dataset',
 'OKVQA: a visual question-answering task that requires outside knowledge and reasoning to answer questions',
 'OpenPath: sourced from tweets across 32 hashtag sub-specialty categories in pathology',
 'PathVQA: a visual QA dataset based on pathology images',
 'Resisc45: a land use dataset that involves land scene classification of images over 45 classes',
 'Screen2Words: a mobile UI summarization dataset',
 'Slake: a medical visual question-answering dataset',
 'UCMerced: a dataset for land use classification which has 21 classes',
 'VCR: commonsense reasoning skills in question answering over images.',
 'VisualGenome: a visual question-answering dataset that grounds visual concepts to language. ',
 'VQA: fine-grained recognition of objects and activities with some commonsense reasoning',
 'VQARAD: a visual question-answering dataset over radiology images',
 'Winoground: a dataset for visual linguistic compositional reasoning',
 'Hallucination (Random selected categories)',
 'Hallucination (Popular categories)',
 'Hallucination (Adversarially selected categories)'
]

# 1. Load a pretrained Sentence Transformer model
model = SentenceTransformer("all-mpnet-base-v2")

# 2. Calculate embeddings by calling model.encode()
embeddings = model.encode(des_datasets)
print(embeddings.shape)

In [None]:
one_hot = clustering(embeddings, 5)

with open("data/dataset_profiles.json", 'r') as f:
    dataset_profiles = json.load(f)

for i, d in enumerate(result_summary.columns):
    if d not in dataset_profiles:
        dataset_profiles[d] = {}
    dataset_profiles[d]['des_cluster'] = list(one_hot[i])

with open("data/dataset_profiles.json", 'w') as f:
    json.dump(dataset_profiles, f, indent=4)

In [None]:
# CLIP
import re

import joblib
import numpy as np
from utils.config import ALL_DATASETS

dataset_rep = []
for dataset in ALL_DATASETS:
    task_name, bench_name = re.match(r"(.*) \((.*)\)", dataset).groups()
    output_file = f"./data/dataset_representation_clip/{task_name}_{bench_name}.pkl"

    data = joblib.load(output_file)
    hs = [ins['hidden_states'] for ins in data]
    hs = np.concatenate(hs)
    print(dataset, hs.shape)
    
    dataset_rep.append(hs.mean(axis=0))

dataset_rep = np.array(dataset_rep)
print(dataset_rep.shape)

In [None]:
one_hot = clustering(dataset_rep, 5)

with open("data/dataset_profiles.json", 'r') as f:
    dataset_profiles = json.load(f)

for i, d in enumerate(result_summary.columns):
    if d not in dataset_profiles:
        dataset_profiles[d] = {}
    dataset_profiles[d]['clip_cluster'] = list(one_hot[i])

with open("data/dataset_profiles.json", 'w') as f:
    json.dump(dataset_profiles, f, indent=4)

# Validate

In [1]:
from method.matrix import MatrixManager

mm = MatrixManager()

model_profile = mm.get_model_profiles(['gt_cluster'])
dataset_profile = mm.get_dataset_profiles(['gt_cluster'])

model_profile.shape, dataset_profile.shape

Load model profile:  (108, 5)
Load dataset profile:  (176, 5)


((108, 5), (176, 5))

In [None]:
dataset_hs_profile = mm.get_dataset_profiles(['hs_cluster'])
dataset_hs_profile.shape

In [None]:
dataset_hs_profile = mm.get_dataset_profiles(['des_cluster'])
dataset_hs_profile.shape

In [None]:
dataset_hs_profile = mm.get_dataset_profiles(['clip_cluster'])
dataset_hs_profile.shape

In [2]:
dataset_hs_profile = mm.get_dataset_profiles(['random'])
dataset_hs_profile.shape

Load dataset profile:  (176, 5)


(176, 5)