In [2]:
import os
import pandas as pd
from local_python.general_utils import load_pd_from_json, number_to_string, check_duplicates

In [3]:
metric_name = "f1_macro"
column_names_groupby = [
    "architecture",
    "downstream",
    "pre-training",
    "number_of_samples",
    "model_name",
]
aggregating_functions = ["mean", "std", "count"]
path_metrics = "../runs/"
path_f1_scores_grouped = "../results/f1_scores.csv"
derma_datasets = ["DDI", "PAD-UFES-20", "HAM10000", "Fitzpatrick17k"]
plant_datasets = ["PlantDoc", "PlantDataset", "Cassava", "PlantVillage"]

name_map = {
    "Random": "Random",
    "ImageNet_1k_SL_V1": "ImageNet SL ",
    "ImageNet_1k_SL_WinKawaks": "ImageNet SL",
    "ImageNet_1k_SSL_SimCLR": "ImageNet SSL ",
    "ImageNet_1k_SSL_Dino": "ImageNet SSL",
    "ImageNet_AugReg": "ImageNet AR",
    "Derma_SSL_SimCLR": "Derma SSL ",
    "Derma": "Derma SSL",
    "PDDD": "Plant SL",
    "Plant": "Plant SSL",
}


In [4]:
def get_downstream(value):
    return get_feature_identifier_part(value, 0).replace("_", "-")


def get_architecture(value):
    return get_feature_identifier_part(value, 1)


def get_pretraining(value):
    return name_map[get_feature_identifier_part(value, 2)]


def get_feature_identifier_part(value, idx):
    return os.path.splitext(os.path.basename(value))[0].split("-")[idx]

def score_to_percent_string(values):
    (mean, std) = values
    mean_str = "%.1f" % (100 * mean)
    if 0==std:
        return mean_str
    std_str = "%.1f" % (100 * std)
    return f"{mean_str}±{std_str}"

In [5]:
df_master = pd.DataFrame()

for name in os.listdir(path_metrics):
    if name.endswith(".txt") and "metrics" in name:
        metric_file_path = os.path.join(path_metrics, name)
        df_full = load_pd_from_json(metric_file_path)
        df_full["number_of_samples"] = df_full["number_of_samples"].apply(
            number_to_string, "All"
        )
        df_full["architecture"] = df_full["feature_identifier"].apply(get_architecture)
        df_full["downstream"] = df_full["feature_identifier"].apply(get_downstream)
        df_full["pre-training"] = df_full["feature_identifier"].apply(get_pretraining)
        df_groups = df_full.groupby(column_names_groupby).agg(
            {metric_name: aggregating_functions}
        )
        df_master = pd.concat(
            [df_master, df_groups.reset_index()], ignore_index=True, axis=0
        )
print(f"{len(df_master)} rows in master file")

Read 7500 entries from Cassava-ResNet50-metrics.txt
Read 9000 entries from Cassava-ViT_T16-student-metrics.txt
Read 7500 entries from DDI-ResNet50-metrics.txt
Read 9000 entries from DDI-ViT_T16-student-metrics.txt
Read 7500 entries from Fitzpatrick17k-ResNet50-metrics.txt
Read 9000 entries from Fitzpatrick17k-ViT_T16-student-metrics.txt
Read 7500 entries from HAM10000-ResNet50-metrics.txt
Read 9000 entries from HAM10000-ViT_T16-student-metrics.txt
Read 3520 entries from master-metrics.txt
Read 6000 entries from PAD_UFES_20-ResNet50-metrics.txt
Read 7200 entries from PAD_UFES_20-ViT_T16-student-metrics.txt
Read 6000 entries from PlantDataset-ResNet50-metrics.txt
Read 7200 entries from PlantDataset-ViT_T16-student-metrics.txt
Read 6000 entries from PlantDoc-ResNet50-metrics.txt
Read 7200 entries from PlantDoc-ViT_T16-student-metrics.txt
Read 7500 entries from PlantVillage-ResNet50-metrics.txt
Read 9000 entries from PlantVillage-ViT_T16-student-metrics.txt
1485 rows in master file


In [6]:
for column_name in df_master.columns:
    unique_values = df_master[column_name].unique()
    if 20 < len(unique_values):
        print(f"{column_name} has {len(unique_values)} unique values")
    else:
        print(f"{column_name}: {unique_values}")

('architecture', ''): ['ResNet50' 'ViT_T16']
('downstream', ''): ['Cassava' 'DDI' 'Fitzpatrick17k' 'HAM10000' 'PAD-UFES-20' 'PlantDataset'
 'PlantDoc' 'PlantVillage']
('pre-training', ''): ['Derma SSL ' 'ImageNet SL ' 'ImageNet SSL ' 'Plant SL' 'Random'
 'Derma SSL' 'ImageNet AR' 'ImageNet SL' 'ImageNet SSL' 'Plant SSL']
('number_of_samples', ''): ['1' '10' '100' '3' '30' 'None']
('model_name', ''): ['dc' 'knn' 'lr']
('f1_macro', 'mean') has 1004 unique values
('f1_macro', 'std') has 989 unique values
('f1_macro', 'count'): [100  10  20]


In [7]:
df_master.columns = df_master.columns.map(lambda col: "_".join(col).strip("_"))
df_master

Unnamed: 0,architecture,downstream,pre-training,number_of_samples,model_name,f1_macro_mean,f1_macro_std,f1_macro_count
0,ResNet50,Cassava,Derma SSL,1,dc,0.019217,0.000000,100
1,ResNet50,Cassava,Derma SSL,1,knn,0.184686,0.035744,100
2,ResNet50,Cassava,Derma SSL,1,lr,0.163515,0.045919,100
3,ResNet50,Cassava,Derma SSL,10,dc,0.019217,0.000000,100
4,ResNet50,Cassava,Derma SSL,10,knn,0.208679,0.018469,100
...,...,...,...,...,...,...,...,...
1480,ViT_T16,PlantVillage,Random,3,knn,0.128046,0.010907,100
1481,ViT_T16,PlantVillage,Random,3,lr,0.175155,0.013696,100
1482,ViT_T16,PlantVillage,Random,30,dc,0.000604,0.000000,100
1483,ViT_T16,PlantVillage,Random,30,knn,0.205410,0.007056,100


In [8]:
df_master.to_csv(path_f1_scores_grouped)

In [9]:
column_order_model_name = [
    "dc",
    "lr",
    "knn",
]

len_before = len(df_master)

df_master = pd.read_csv(
    path_f1_scores_grouped,
    index_col=0,
    dtype={
        "pre-training": pd.CategoricalDtype(
            categories=list(name_map.values()), ordered=True
        ),
        "model_name": pd.CategoricalDtype(
            categories=column_order_model_name, ordered=True
        )
    },
)
assert len_before == len(df_master)
df_master.head(n=5)


Unnamed: 0,architecture,downstream,pre-training,number_of_samples,model_name,f1_macro_mean,f1_macro_std,f1_macro_count
0,ResNet50,Cassava,Derma SSL,1,dc,0.019217,0.0,100
1,ResNet50,Cassava,Derma SSL,1,knn,0.184686,0.035744,100
2,ResNet50,Cassava,Derma SSL,1,lr,0.163515,0.045919,100
3,ResNet50,Cassava,Derma SSL,10,dc,0.019217,0.0,100
4,ResNet50,Cassava,Derma SSL,10,knn,0.208679,0.018469,100


In [10]:
df_master["f1_macro"] = df_master[["f1_macro_mean", "f1_macro_std"]].apply(score_to_percent_string, axis=1)

In [11]:
def filter_and_unstack(df, filter_config):
    df_grouping_columns = [
        "downstream",
        "pre-training",
        "model_name",
    ]
    for filter_key, filter_list in filter_config.items():
        assert (
            filter_key in df.columns.values
        ), f"No column found with name {filter_key}"
        df = df[df[filter_key].isin(filter_list)]
    df = df[
        [
            "downstream",
            "pre-training",
            "model_name",
            "f1_macro",
        ]
    ]
    check_duplicates(df, df_grouping_columns)
    return df.set_index(df_grouping_columns).sort_index().unstack(level=0).unstack(level=-1).sort_index()

In [12]:
df_resnet = filter_and_unstack(df_master,
    {
        "number_of_samples": ["None"],
        "architecture": ["ResNet50"],
        "model_name": ["lr", "knn"],
    },
)
df_vit = filter_and_unstack(df_master,
    {
        "number_of_samples": ["None"],
        "architecture": ["ViT_T16"],
        "model_name": ["lr", "knn"],
    },
)
df_baseline = filter_and_unstack(df_master,
    {
        "number_of_samples": ["None"],
        "architecture": ["ResNet50"],
        "model_name": ["dc"],
    },
)
df_baseline.head(n=1)

Unnamed: 0_level_0,f1_macro,f1_macro,f1_macro,f1_macro,f1_macro,f1_macro,f1_macro,f1_macro
downstream,Cassava,DDI,Fitzpatrick17k,HAM10000,PAD-UFES-20,PlantDataset,PlantDoc,PlantVillage
model_name,dc,dc,dc,dc,dc,dc,dc,dc
pre-training,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3
Random,15.2,42.6,28.1,10.7,8.9,1.8,0.3,0.5


In [13]:
df_resnet["f1_macro"][derma_datasets].to_csv("../results/f1_scores_resnet_derma.csv")
df_resnet["f1_macro"][plant_datasets].to_csv("../results/f1_scores_resnet_plant.csv")
df_vit["f1_macro"][derma_datasets].to_csv("../results/f1_scores_vit_derma.csv")
df_vit["f1_macro"][plant_datasets].to_csv("../results/f1_scores_vit_plant.csv")