## Distinguish foundation models

In [16]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from tqdm import tqdm

In [3]:
df = pd.read_csv("../reports/task1_predict_task_performance.csv")
df["label"] = ["roberta"]*6 + ["xlm"]*6 + ["albert"]*6 + ["deberta"]*6 + ["xlnet"]
df.head()

Unnamed: 0,LM,rte,cola,qnli,mrpc,sst2,qqp,bigram_shift_layer_0,coordination_inversion_layer_0,obj_number_layer_0,...,subj_number_layer_11,tree_depth_layer_11,bigram_shift_layer_12,coordination_inversion_layer_12,obj_number_layer_12,odd_man_out_layer_12,past_present_layer_12,subj_number_layer_12,tree_depth_layer_12,label
0,embeddings_roberta_base,0.7726,0.8437,0.9251,0.8995,0.9438,0.9143,0.5,0.5,0.5,...,0.793333,0.283714,0.850667,0.654333,0.787333,0.619667,0.879667,0.815333,0.295238,roberta
1,embeddings_roberta_base_corr_500,0.7148,0.8322,0.9213,0.875,0.9415,0.9164,0.5,0.5,0.5,...,0.704333,0.250381,0.685667,0.546667,0.813,0.582667,0.868,0.806,0.294857,roberta
2,embeddings_roberta_base_corr_1000,0.704,0.838,0.9213,0.8824,0.9392,0.916,0.5,0.5,0.5,...,0.721333,0.259905,0.605667,0.547,0.81,0.564333,0.864333,0.814667,0.297714,roberta
3,embeddings_roberta_base_corr_2000,0.6859,0.8341,0.9185,0.8824,0.9415,0.9166,0.5,0.5,0.5,...,0.747667,0.27619,0.579,0.539333,0.81,0.560667,0.870333,0.812,0.301143,roberta
4,embeddings_roberta_base_corr_4000,0.5848,0.8399,0.9209,0.8873,0.945,0.9165,0.5,0.5,0.5,...,0.785667,0.28419,0.559333,0.540667,0.804333,0.570667,0.865333,0.810333,0.302667,roberta


In [22]:
def distinguish_models_cv(df_, features, verbose=False):
    np.random.seed(42)
    df = df_[features + ["label"]]
    kfold = KFold(n_splits=5, shuffle=True)
    trainaccs = []
    devaccs = []
    for train_idx, val_idx in kfold.split(df):
        df_tr = df.loc[train_idx]
        df_val = df.loc[val_idx]
        
        model = LogisticRegression()
        model.fit(df_tr[features], df_tr["label"])
        preds = model.predict(df_tr[features])
        trainaccs.append(accuracy_score(df_tr["label"], preds))
        preds = model.predict(df_val[features])
        devaccs.append(accuracy_score(df_val["label"], preds))
    if verbose:
        print("Train: Acc mean {:.4f}, std {:.4f}".format(
            np.mean(trainaccs), np.std(trainaccs)
        ))
        print("Dev: Acc mean {:.4f}, std {:.4f}".format(
            np.mean(devaccs), np.std(devaccs)
        ))
    return np.mean(devaccs), np.std(devaccs)
    
distinguish_models_cv(df, [
    "bigram_shift_layer_5", 
    "coordination_inversion_layer_6",
    "obj_number_layer_1",
    "odd_man_out_layer_5",
    "past_present_layer_1",
    "subj_number_layer_1",
    "tree_depth_layer_1"
], verbose=True)

Train: Acc mean 0.5200, std 0.1122
Dev: Acc mean 0.1600, std 0.2332


(0.16, 0.233238075793812)

In [19]:
def find_3_best_features(df):
    probing_tasks = ["bigram_shift", "coordination_inversion", "obj_number",
                    "odd_man_out", "past_present", "subj_number", "tree_depth"]
    all_features = [f"{pt}_layer_{layer}" 
                    for layer in range(1, 13) 
                    for pt in probing_tasks]
    best_mean_acc = None
    best_feats = []
    for i in tqdm(range(len(all_features)-2)):
        for j in range(i+1, len(all_features)-1):
            for k in range(j+1, len(all_features)):
                feats = [all_features[i], all_features[j], all_features[k]]
                mean_acc, _ = distinguish_models_cv(df, feats, verbose=False)
                if best_mean_acc is None or mean_acc > best_mean_acc:
                    best_mean_acc = mean_acc
                    best_feats = feats
    return best_mean_acc, best_feats

find_3_best_features(df)

100%|██████████| 82/82 [23:18<00:00, 17.05s/it] 


(0.27999999999999997,
 ['bigram_shift_layer_1', 'obj_number_layer_4', 'obj_number_layer_12'])