In [None]:
import os
import pickle
from sklearn.metrics import classification_report
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from imblearn.over_sampling import RandomOverSampler
import numpy as np

PATH = "/local/data/oleszkie/labels"
test_classes = ["zebra", "dingo", "bison", "koala", "jaguar", "chimpanzee", "hog", "hamster", "lion", "beaver", "lynx", "convertible", "sports_car", "airliner", "jeep", "passenger_car", "steam_locomotive", "cab", "garbage_truck", "warplane", "ambulance", "police_van", "planetarium", "castle", "church", "mosque", "triumphal_arch", "barn", "stupa", "boathouse", "suspension_bridge", "steel_arch_bridge", "viaduct", "sax", "flute", "cornet", "panpipe", "drum", "cello", "acoustic_guitar", "grand_piano", "banjo", "maraca", "chime", "Granny_Smith", "fig", "custard_apple", "banana", "corn", "lemon", "pomegranate", "pineapple", "jackfruit", "strawberry", "orange"]
assert test_classes.__len__() == 55

In [None]:
top_concepts = ['zebra_concept5',
 'dingo_concept8',
 'bison_concept9',
 'koala_concept19',
 'jaguar_concept6',
 'chimpanzee_concept17',
 'hog_concept8',
 'hamster_concept2',
 'lion_concept7',
 'beaver_concept1',
 'lynx_concept3',
 'convertible_concept2',
 'sports_car_concept15',
 'airliner_concept17',
 'jeep_concept1',
 'passenger_car_concept23',
 'steam_locomotive_concept14',
 'cab_concept15',
 'garbage_truck_concept11',
 'warplane_concept10',
 'ambulance_concept13',
 'police_van_concept4',
 'planetarium_concept4',
 'castle_concept14',
 'church_concept5',
 'mosque_concept11',
 'triumphal_arch_concept7',
 'barn_concept9',
 'stupa_concept16',
 'boathouse_concept1',
 'suspension_bridge_concept1',
 'steel_arch_bridge_concept16',
 'viaduct_concept16',
 'sax_concept10',
 'flute_concept3',
 'cornet_concept10',
 'panpipe_concept8',
 'drum_concept11',
 'cello_concept17',
 'acoustic_guitar_concept12',
 'grand_piano_concept11',
 'banjo_concept10',
 'maraca_concept15',
 'chime_concept2',
 'Granny_Smith_concept6',
 'fig_concept16',
 'custard_apple_concept11',
 'banana_concept10',
 'corn_concept6',
 'lemon_concept10',
 'pomegranate_concept1',
 'pineapple_concept12',
 'jackfruit_concept7',
 'strawberry_concept5',
 'orange_concept6',
 'sports_car_concept19',
 'sports_car_concept16',
 'sports_car_concept10',
 'sports_car_concept11',
 'panpipe_concept13',
 'panpipe_concept1',
 'steam_locomotive_concept17',
 'steel_arch_bridge_concept9',
 'orange_concept8',
 'panpipe_concept5',
 'panpipe_concept6',
 'panpipe_concept15',
 'steel_arch_bridge_concept12',
 'orange_concept11',
 'steel_arch_bridge_concept13',
 'panpipe_concept14',
 'zebra_concept7',
 'sports_car_concept2',
 'steel_arch_bridge_concept6',
 'panpipe_concept11',
 'steel_arch_bridge_concept14',
 'sports_car_concept6',
 'steel_arch_bridge_concept1',
 'planetarium_concept2',
 'orange_concept9',
 'panpipe_concept12',
 'panpipe_concept2',
 'steel_arch_bridge_concept11',
 'Granny_Smith_concept8',
 'Granny_Smith_concept10',
 'sports_car_concept9',
 'sports_car_concept7',
 'orange_concept16',
 'triumphal_arch_concept10',
 'stupa_concept11',
 'sports_car_concept18',
 'airliner_concept9',
 'steel_arch_bridge_concept3',
 'mosque_concept12',
 'strawberry_concept3',
 'sports_car_concept12',
 'airliner_concept3',
 'orange_concept15',
 'planetarium_concept14',
 'planetarium_concept18']

In [None]:
labels = {}
embeddings = {}

for phase in ['train', 'val']:
    for clas in test_classes:
        for ss in ['simclr', 'swav', 'byol', 'moco']:
            with open(os.path.join(PATH, f"{phase}_embd_{ss}_55_{clas}.pkl"), "rb") as file:
                embeddings[f"{ss}_{phase}_{clas}"] = pickle.load(file)
        with open(os.path.join(PATH, f"{phase}_labels_55_{clas}.pkl"), "rb") as file:
            labels[f"{phase}_{clas}"] = pickle.load(file)
            

In [None]:
import numpy as np
from sklearn.utils import shuffle

X = {}
Y = {}
X_val = {}
Y_val = {}

for ss in ['simclr', 'swav', 'byol', 'moco']:
    x = []
    y = []
    x_val = []
    y_val = []

    for clas in test_classes:
        y.append(labels[f"train_{clas}"])
        x.append(embeddings[f"{ss}_train_{clas}"])
        y_val.append(labels[f"val_{clas}"])
        x_val.append(embeddings[f"{ss}_val_{clas}"])

    x = np.concatenate(x)
    y = np.concatenate(y)
    x_val = np.concatenate(x_val)
    y_val = np.concatenate(y_val)

    x, y = shuffle(x, y)
    x_val, y_val = shuffle(x_val, y_val)
                                
    X[ss] = x
    Y[ss] = y
    X_val[ss] = x_val
    Y_val[ss] = y_val

In [None]:
ros = RandomOverSampler(random_state=0)
from datetime import datetime


results = {'swav': [], 'simclr': [], 'byol': [], 'moco': []}
for concept in range(100):
    for ss in ['swav', 'simclr', 'byol', 'moco']:
        X_resampled, Y_resampled = ros.fit_resample(X[ss], Y[ss][:, concept])
        clf = LogisticRegression(class_weight='balanced', max_iter=1000)
        clf.fit(X_resampled, Y_resampled)
        y_pred = clf.predict(X_val[ss])
        auc = roc_auc_score(Y_val[ss][:, concept], y_pred)
        results[ss].append(auc)

In [None]:
df = pd.DataFrame.from_dict(results)