based on: https://www.kaggle.com/code/jhoward/which-image-models-are-best

In [2]:
! git clone --depth 1 https://github.com/rwightman/pytorch-image-models.git
%cd pytorch-image-models/results

fatal: destination path 'pytorch-image-models' already exists and is not an empty directory.
/Users/sueszli/dev/advx-bench/docs/pytorch-image-models/results


In [3]:
import pandas as pd
df_results = pd.read_csv('results-imagenet.csv')

df_results['model_org'] = df_results['model'] 
df_results['model'] = df_results['model'].str.split('.').str[0]

def get_data(part, col):
    df = pd.read_csv(f'benchmark-{part}-amp-nhwc-pt111-cu113-rtx3090.csv').merge(df_results, on='model')
    df['secs'] = 1. / df[col]
    df['family'] = df.model.str.extract('^([a-z]+?(?:v2)?)(?:\d|_|$)')
    df = df[~df.model.str.endswith('gn')]
    df.loc[df.model.str.contains('in22'),'family'] = df.loc[df.model.str.contains('in22'),'family'] + '_in22'
    df.loc[df.model.str.contains('resnet.*d'),'family'] = df.loc[df.model.str.contains('resnet.*d'),'family'] + 'd'
    return df[df.family.str.contains('^re[sg]netd?|beit|convnext|levit|efficient|vit|vgg|swin')]

df = get_data('infer', 'infer_samples_per_sec')

In [28]:
import plotly.express as px
w,h = 1000,800

def show_all(df, title, size):
    return px.scatter(df, width=w, height=h, size=df[size]**2, title=title, x='secs',  y='top1', log_x=True, color='family', hover_name='model_org', hover_data=[size])

# show_all(df, 'Inference', 'infer_img_size') -> too overwhelming

subs = 'levit|resnetd?|regnetx|vgg|convnext.*|efficientnetv2|beit|swin'

def show_subs(df, title, size):
    df_subs = df[df.family.str.fullmatch(subs)]

    fam_acc = {}
    for family in df_subs.family.unique():
        max_acc = df_subs[df_subs.family == family].top1.max()
        model = df_subs[(df_subs.family == family) & (df_subs.top1 == max_acc)].model_org.values[0]
        fam_acc[family] = [max_acc, model]
    fam_acc = dict(sorted(fam_acc.items(), key=lambda x: x[1][0], reverse=True))
    print(f"top1 accuracy by family:")
    for family, (acc, model) in fam_acc.items():
        print(f"- {family} ({acc:.2f}% top1, by {model})")

    return px.scatter(df_subs, width=w, height=h, size=df_subs[size]**2, title=title,
        trendline="ols", trendline_options={'log_x':True},
        x='secs',  y='top1', log_x=True, color='family', hover_name='model_org', hover_data=[size])

show_subs(df, 'Inference', 'infer_img_size')


top1 accuracy by family:
- beit (88.60% top1, by beit_large_patch16_512.in22k_ft_in22k_in1k)
- convnext (87.47% top1, by convnext_large.fb_in22k_ft_in1k_384)
- swin (87.13% top1, by swin_large_patch4_window12_384.ms_in22k_ft_in1k)
- efficientnetv2 (84.81% top1, by efficientnetv2_rw_m.agc_in1k)
- resnetd (83.96% top1, by resnet200d.ra2_in1k)
- resnet (83.45% top1, by resnet152.a1h_in1k)
- regnetx (82.81% top1, by regnetx_320.tv2_in1k)
- levit (82.60% top1, by levit_384.fb_dist_in1k)
- vgg (74.22% top1, by vgg19_bn.tv_in1k)


In [9]:
tdf = get_data('train', 'train_samples_per_sec')
# show_all(tdf, 'Training', 'train_img_size')
show_subs(tdf, 'Training', 'train_img_size')