In [1]:
import torch
import numpy as np
from collections import defaultdict

import pandas as pd
import pathlib
from rich import print

import matplotlib.pyplot as plt

# set pyplot theme to ggplot2
plt.style.use("ggplot")
import wandb

In [2]:
source_csv_filepath = (
    "13032024-full.csv"
)
source_csv_filepath = pathlib.Path(source_csv_filepath)

In [3]:
def load_data_as_df(filepath):
    df = pd.read_csv(filepath)

    # Concatenating the header with the first row
    new_headers = [f"{col.split('.')[0]}.{df.iloc[0][idx]}" for idx, col in enumerate(df.columns)]

    # Setting the new concatenated values as column names
    df.columns = new_headers

    # Removing the first row from the DataFrame
    df = df.drop(df.index[0])

    # Resetting the DataFrame index
    df.reset_index(drop=True, inplace=True)
    # replace NA with 5
    df = df.fillna(5)
    return df

In [4]:
df = load_data_as_df(source_csv_filepath)

# visualize using tabulate for ipython noteboooks
from IPython.display import display

# Assuming df is your DataFrame
with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(
        df.style.set_table_attributes('style="display:inline"').bar(
            color="lightblue"
        )
    )

  new_headers = [f"{col.split('.')[0]}.{df.iloc[0][idx]}" for idx, col in enumerate(df.columns)]


Unnamed: 0,Model.Metric,cityscapes.mIoU,cityscapes.dice-loss,acdc.dice-loss,ade20k.mIoU,ade20k.dice-loss,aircraft.acc@1,chexpert.auc,cifar100.loss,cifar100.acc@5,cifar100.acc@1,clevr.acc@1,clevr-math.acc@5,clevr-math.acc@1,coco-10k.mIoU,coco-10k.dice-loss,coco-164k.mIoU,coco-164k.dice-loss,cubirds.acc@1,diabetic.auc,dtextures.acc@1,flickr30k.text-to-image-acc@5,flickr30k.text-to-image-acc@1,flickr30k.image-to-text-acc@1,flickr30k.image-to-text-acc@5,food101.loss,food101.acc@5,food101.acc@1,fungi.acc@1,ham10k.auc,hmdb51.acc@5,hmdb51.acc@1,imagenet1k.loss,imagenet1k.acc@5,imagenet1k.acc@1,iwildcam.mae,kinetics.acc@5,kinetics.acc@1,mini.acc@1,newyorkercaptioncontest.text-to-image-acc@5,newyorkercaptioncontest.text-to-image-acc@1,newyorkercaptioncontest.image-to-text-acc@1,newyorkercaptioncontest.image-to-text-acc@5,nyu.mIoU,nyu.dice-loss,omniglot.acc@1,pascal.mIoU,pascal.dice-loss,places365.loss,places365.acc@5,places365.acc@1,pokemonblipcaptions.text-to-image-acc@5,pokemonblipcaptions.text-to-image-acc@1,pokemonblipcaptions.image-to-text-acc@1,pokemonblipcaptions.image-to-text-acc@5,ucf.acc@5,ucf.acc@1,vgg.acc@1,winoground.text-to-image-acc@1,winoground.image-to-text-acc@1
0,AR-ViT-B16-224,57.848,0.6481,0.571,17.353,0.777,0.944,0.813,1.636,83.629,55.742,52.828,98.85,58.849,22.783,0.973,21.321,0.97,0.944,0.839,0.799,0.188,0.053,0.055,0.187,0.582,96.63,83.834,0.775,0.985,57.917,28.385,0.967,93.007,78.199,4.093,63.288,36.718,0.987,0.195,0.052,0.054,0.187,8.345,0.271,0.986,15.46,0.781,2.062,78.706,46.641,0.821,0.64,0.628,0.794,84.369,65.374,0.955,0.569,0.611
1,BART,0.0,5.0,0.747,0.38,0.668,0.53,0.637,3.985,29.17,8.967,42.563,96.45,41.761,0.056,0.959,0.11,0.965,0.503,0.551,0.354,0.079,0.016,0.015,0.078,3.889,31.844,11.504,0.337,0.859,9.961,1.953,6.622,3.107,0.713,4.489,1.277,0.256,0.373,0.081,0.016,0.016,0.081,4.893,0.24,0.841,1.002,0.556,5.248,11.058,2.953,0.748,0.556,0.556,0.694,5.444,1.031,0.594,0.477,0.488
2,BERT,0.0,5.0,0.959,0.36,0.67,0.59,0.662,3.685,38.137,14.52,42.593,97.4,44.265,0.055,0.962,0.096,0.966,0.474,0.576,0.376,0.081,0.016,0.015,0.083,3.499,42.442,18.191,0.328,0.911,9.766,1.953,6.473,4.616,1.173,4.576,1.269,0.233,0.396,0.082,0.015,0.016,0.085,5.503,0.241,0.902,0.961,0.545,5.008,16.037,4.795,0.756,0.599,0.587,0.722,6.04,1.287,0.594,0.504,0.503
3,CLIP-B16-224,65.142,0.5994,0.538,33.804,0.687,0.965,0.816,0.908,93.76,74.731,52.622,99.113,61.218,29.609,0.968,30.369,0.954,0.979,0.867,0.843,0.209,0.055,0.062,0.206,0.306,98.679,91.496,0.854,0.986,70.039,40.404,0.752,95.708,81.815,3.613,71.24,44.574,0.962,0.21,0.057,0.06,0.199,7.545,0.287,0.989,26.619,0.632,1.757,83.864,53.445,0.852,0.679,0.618,0.817,88.238,71.44,0.989,0.54,0.486
4,ConvNextV2-Base,62.341,0.5822,0.636,37.253,0.472,0.967,0.818,0.64,97.422,84.126,52.606,99.787,77.402,26.84,0.147,25.059,0.169,0.98,0.875,0.85,0.212,0.055,0.06,0.205,0.272,98.991,92.896,0.857,0.991,77.539,50.326,0.635,96.851,85.288,3.446,75.765,49.011,0.958,0.214,0.058,0.063,0.21,5.26,0.225,0.986,32.665,0.297,1.659,85.325,54.726,0.803,0.61,0.616,0.783,94.737,84.106,0.996,0.478,0.398
5,DINO-B16-224,60.224,0.6387,0.515,18.61,0.733,0.956,0.819,1.2,89.59,66.282,52.846,99.188,61.001,24.968,0.969,24.292,0.951,0.959,0.847,0.791,0.196,0.046,0.054,0.186,0.478,97.449,86.519,0.812,0.987,56.525,27.995,1.376,88.493,67.529,4.01,55.634,29.529,0.899,0.2,0.052,0.053,0.19,6.138,0.286,0.987,18.184,0.698,1.987,79.704,47.454,0.856,0.682,0.655,0.818,77.125,52.769,0.959,0.483,0.495
6,DeiT3-B16-224,62.833,0.6401,0.591,21.9,0.703,0.953,0.827,1.162,90.844,66.75,52.342,99.35,62.701,28.345,0.953,26.125,0.959,0.962,0.818,0.819,0.2,0.05,0.056,0.192,0.447,97.718,87.331,0.805,0.985,64.338,36.523,0.794,94.675,82.011,3.194,66.558,39.528,0.988,0.219,0.06,0.065,0.205,7.488,0.292,0.986,22.291,0.667,1.943,80.614,48.677,0.826,0.67,0.639,0.81,88.506,70.411,0.962,0.492,0.424
7,EffFormer-s0,2.492,5.0,0.73,9.801,0.662,0.775,0.8,3.478,43.571,16.511,45.115,98.7,53.862,13.995,0.903,13.549,0.865,0.863,0.818,0.567,0.156,0.035,0.042,0.16,1.483,86.983,62.578,0.512,0.818,9.726,1.628,2.83,72.456,44.615,3.485,1.334,0.279,0.908,0.151,0.034,0.034,0.144,10.498,0.236,0.954,14.026,0.486,3.276,55.901,25.548,0.775,0.605,0.594,0.782,5.173,0.84,0.903,0.568,0.513
8,EffV2-RW-S,63.144,0.5982,0.391,12.668,0.679,0.894,0.818,1.461,89.471,64.968,39.848,97.463,43.199,8.704,0.937,8.268,0.955,0.921,0.861,0.595,0.173,0.04,0.037,0.161,0.643,96.816,85.282,0.737,0.975,27.699,11.068,1.296,90.439,71.549,3.701,2.357,0.561,0.628,0.168,0.035,0.034,0.149,8.216,0.244,0.986,11.776,0.618,1.842,82.389,51.066,0.651,0.463,0.46,0.625,25.84,9.611,0.912,0.5,0.5
9,Flex-B-1200EP,66.623,0.6141,0.546,23.308,0.708,0.959,0.829,0.88,94.367,75.1,51.956,98.825,59.879,30.371,0.948,27.997,0.953,0.964,0.857,0.784,0.208,0.049,0.059,0.2,0.392,98.093,89.142,0.837,0.985,60.885,32.227,0.76,95.428,82.265,3.153,67.584,40.673,0.991,0.209,0.053,0.053,0.196,6.197,0.275,0.989,21.66,0.686,1.783,83.311,52.065,0.81,0.637,0.606,0.79,81.51,63.159,0.967,0.502,0.482


In [5]:
import plotly.express as px

# List of all columns (replace with your actual column names)
# df = pd.read_csv(source_csv_filepath)
df = load_data_as_df(source_csv_filepath)
columns = df.columns.tolist()[1:]
model_names = df.columns.tolist()[0]
# remove all datapoints with less than 50% on imagenet1k.1


# model_names = df[model_names][1:]

# Convert all columns (except the first) to numeric
for col in df.columns[1:]:
    df[col] = pd.to_numeric(df[col], errors="coerce")

# df = df[df["imagenet1k.acc@1"] > 50]
# Set the first column as index (to be used as labels)
# df.set_index(df.columns[0], inplace=True)
for idx, col in enumerate(columns):  # Exclude first column from plotting
    fig = px.scatter(
        df,
        x="imagenet1k.acc@1",
        y=col,
        title=f"Scatter plot of {col} vs imagenet1k acc@1",
    )
    fig.update_traces(
        mode="markers+text",
        marker=dict(size=5),
        text=df[model_names],
        textposition="top center",
        textfont=dict(size=8),
    )  # Adjust marker size and add labels here
    fig.show()

  new_headers = [f"{col.split('.')[0]}.{df.iloc[0][idx]}" for idx, col in enumerate(df.columns)]


In [6]:
# Convert all columns (except the first) to numeric
df = load_data_as_df(source_csv_filepath)
columns = df.columns.tolist()[1:]
model_names = df.columns.tolist()[0]

for col in df.columns[1:]:
    df[col] = pd.to_numeric(df[col], errors="coerce")
    # normalize the data
    # df[col] = (df[col] - df[col].mean()) / df[col].std()

# Set the first column as index (to be used as labels)
df.set_index(df.columns[0], inplace=True)

correlation_matrix = df.corr(method="spearman")["imagenet1k.acc@1"]
print(correlation_matrix)

sorted_correlation_matrix = correlation_matrix.abs().sort_values(
    ascending=False
)
print(sorted_correlation_matrix)

# return as latex table
print(sorted_correlation_matrix.to_latex())


Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



Just threshold at 60% for imagenet (top 1), recompute the above so we get some signal towards the question "If A does better than B, where both A and B do 'well' on imagenet, do I have a reason to believe that A IS better than B on previously unseen scenarios. Use spearman correlation. For how many times A is Better than B, count how many times it happens and compute a probability.

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import root_mean_squared_error

# Assuming df is your DataFrame
X = df.copy()
X.dropna(inplace=True)
# Create a linear regression model


# Use leave-one-out cross-validation
kfold = KFold(n_splits=10)

# Dictionary to store the average RMSE for each metric
avg_rmse_dict = {}
std_rmse_dict = {}
# 47 x 16

# train: 47 x 8 -> x = 1 x 8, y = 46 x 8
# test: 47 x 8 -> x = 1 x 8, y = 46 x 8

# For each column in the DataFrame
for col in df.columns:
    # Use the column as the input and the remaining columns as the target
    x = X[[col]]
    y = X

    # Initialize a list to store the RMSE for each target column
    rmse_scores = []

    # For each row (leave-one-out cross-validation)
    for train_index, test_index in kfold.split(x):
        model = LinearRegression()
        # Split the data into training and testing sets
        x_train, x_test = x.iloc[train_index], x.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Fit the model
        model.fit(x_train, y_train)

        # Make predictions
        y_pred = model.predict(x_test)

        # Calculate the RMSE and add it to the list
        rmse_scores.append(root_mean_squared_error(y_test, y_pred))

    # Calculate the average RMSE
    avg_rmse = np.mean(rmse_scores)
    std_rmse = np.std(rmse_scores)

    # Store the average RMSE in the dictionary
    avg_rmse_dict[col] = avg_rmse
    std_rmse_dict[col] = std_rmse

# Find the metric with the lowest RMSE
best_metric = min(avg_rmse_dict, key=avg_rmse_dict.get)

print(
    f"The most predictive metric is {best_metric} with an average RMSE of {avg_rmse_dict[best_metric]}"
)
# print(f'-- Mean RMSE --')
# print(avg_rmse_dict)
# print(f'-- Std RMSE --')
# print(std_rmse_dict)

# sort by value
sorted_avg_rmse_dict = sorted(avg_rmse_dict.items(), key=lambda x: x[1])
sorted_std_rmse_dict = sorted(std_rmse_dict.items(), key=lambda x: x[1])

print(f"-- Mean RMSE Sorted --")
print(sorted_avg_rmse_dict)
print(f"-- Std RMSE Sorted --")
print(sorted_std_rmse_dict)

In [8]:
from itertools import combinations
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import root_mean_squared_error

# Assuming df is your DataFrame
X = df.copy()
X.dropna(inplace=True)
# Create a linear regression model


# Use leave-one-out cross-validation
kfold = KFold(n_splits=10)

# Dictionary to store the average RMSE for each metric
avg_rmse_dict = {}
std_rmse_dict = {}
# 47 x 16
column_pairs = []

columns = df.columns.tolist()
columns_datasets = set([column.split(".")[0] for column in columns])
column_combinations = combinations(columns_datasets, 2)
# train: 47 x 8 -> x = 1 x 8, y = 46 x 8
# test: 47 x 8 -> x = 1 x 8, y = 46 x 8

# For each column in the DataFrame
for col in column_combinations:
    # Use the column as the input and the remaining columns as the target
    columns = [column for column in df.columns if any(item in column for item in col)]
    x = X[columns]
    y = X

    # Initialize a list to store the RMSE for each target column
    rmse_scores = []

    # For each row (leave-one-out cross-validation)
    for train_index, test_index in kfold.split(x):
        model = LinearRegression()
        # Split the data into training and testing sets
        x_train, x_test = x.iloc[train_index], x.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Fit the model
        model.fit(x_train, y_train)

        # Make predictions
        y_pred = model.predict(x_test)

        # Calculate the RMSE and add it to the list
        rmse_scores.append(root_mean_squared_error(y_test, y_pred))

    # Calculate the average RMSE
    avg_rmse = np.mean(rmse_scores)
    std_rmse = np.std(rmse_scores)

    # Store the average RMSE in the dictionary
    avg_rmse_dict[col] = avg_rmse
    std_rmse_dict[col] = std_rmse

# Find the metric with the lowest RMSE
best_metric = min(avg_rmse_dict, key=avg_rmse_dict.get)

print(
    f"The most predictive metric is {best_metric} with an average RMSE of {avg_rmse_dict[best_metric]}"
)
# print(f'-- Mean RMSE --')
# print(avg_rmse_dict)
# print(f'-- Std RMSE --')
# print(std_rmse_dict)

# sort by value
sorted_avg_rmse_dict = sorted(avg_rmse_dict.items(), key=lambda x: x[1])
sorted_std_rmse_dict = sorted(std_rmse_dict.items(), key=lambda x: x[1])

print(f"-- Mean RMSE Sorted --")
print(sorted_avg_rmse_dict)
print(f"-- Std RMSE Sorted --")
print(sorted_std_rmse_dict)

In [9]:
from itertools import combinations
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import root_mean_squared_error

# Assuming df is your DataFrame
X = df.copy()
X.dropna(inplace=True)
# Create a linear regression model


# Use leave-one-out cross-validation
kfold = KFold(n_splits=10)

# Dictionary to store the average RMSE for each metric
avg_rmse_dict = {}
std_rmse_dict = {}
# 47 x 16
column_pairs = []

columns = df.columns.tolist()
columns_datasets = set([column.split(".")[0] for column in columns])
column_combinations = combinations(columns_datasets, 3)
# train: 47 x 8 -> x = 1 x 8, y = 46 x 8
# test: 47 x 8 -> x = 1 x 8, y = 46 x 8

# For each column in the DataFrame
for col in column_combinations:
    # Use the column as the input and the remaining columns as the target
    columns = [column for column in df.columns if any(item in column for item in col)]
    x = X[columns]
    y = X

    # Initialize a list to store the RMSE for each target column
    rmse_scores = []

    # For each row (leave-one-out cross-validation)
    for train_index, test_index in kfold.split(x):
        model = LinearRegression()
        # Split the data into training and testing sets
        x_train, x_test = x.iloc[train_index], x.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Fit the model
        model.fit(x_train, y_train)

        # Make predictions
        y_pred = model.predict(x_test)

        # Calculate the RMSE and add it to the list
        rmse_scores.append(root_mean_squared_error(y_test, y_pred))

    # Calculate the average RMSE
    avg_rmse = np.mean(rmse_scores)
    std_rmse = np.std(rmse_scores)

    # Store the average RMSE in the dictionary
    avg_rmse_dict[col] = avg_rmse
    std_rmse_dict[col] = std_rmse

# Find the metric with the lowest RMSE
best_metric = min(avg_rmse_dict, key=avg_rmse_dict.get)

print(
    f"The most predictive metric is {best_metric} with an average RMSE of {avg_rmse_dict[best_metric]}"
)
# print(f'-- Mean RMSE --')
# print(avg_rmse_dict)
# print(f'-- Std RMSE --')
# print(std_rmse_dict)

# sort by value
sorted_avg_rmse_dict = sorted(avg_rmse_dict.items(), key=lambda x: x[1])
sorted_std_rmse_dict = sorted(std_rmse_dict.items(), key=lambda x: x[1])

print(f"-- Mean RMSE Sorted --")
print(sorted_avg_rmse_dict)
print(f"-- Std RMSE Sorted --")
print(sorted_std_rmse_dict)