## Predict performances with probing
Task 1: Does probing results predict the GLUE task performances?  
Task 2: Does probing results predict the robustness against generalization?

In [14]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf 
import numpy as np
from pathlib import Path 

In [3]:
probing_df = pd.read_csv("../reports/probing_roberta_base.csv")
probing_df = probing_df[probing_df["config"]=="Full"]
print(probing_df.shape)
probing_df.head()

(546, 14)


Unnamed: 0,LM,layer,task,config,model,train_acc,train_loss,val_acc,val_loss,test_acc,test_loss,rs,train_size_per_class,nclasses
0,embeddings_roberta_base,0,bigram_shift,Full,DecisionTree,0.5,0.693147,0.5,0.693147,0.5,0.693147,6560.2,1200.0,2.0
3,embeddings_roberta_base,0,coordination_inversion,Full,DecisionTree,0.5,0.693147,0.5,0.693147,0.5,0.693147,6560.2,1200.0,2.0
6,embeddings_roberta_base,0,obj_number,Full,DecisionTree,0.5,0.693147,0.5,0.693147,0.5,0.693147,6560.2,1200.0,2.0
9,embeddings_roberta_base,0,odd_man_out,Full,DecisionTree,0.500833,0.693146,0.5,0.693149,0.5,0.693149,6560.2,1200.0,2.0
12,embeddings_roberta_base,0,past_present,Full,DecisionTree,0.5,0.693147,0.5,0.693147,0.5,0.693147,6560.2,1200.0,2.0


In [7]:
probing_df[probing_df.LM=="embeddings_roberta_base"].shape

(91, 14)

In [10]:
glue_df = pd.read_csv("../reports/glue_classification_results.csv")
glue_df = glue_df[~glue_df.task.str.contains("processed")]
print(glue_df.shape)
glue_df.head()

(36, 5)


Unnamed: 0,task,LM,init_lr,slurm_id,dev_acc
0,rte,embeddings_roberta_base,1e-05,5236891,0.7726
1,rte,embeddings_roberta_base_corr_500,1e-05,5215525,0.7148
2,rte,embeddings_roberta_base_corr_1000,1e-05,5258357,0.704
3,rte,embeddings_roberta_base_corr_2000,1e-05,5258397,0.6859
4,rte,embeddings_roberta_base_corr_4000,1e-05,5258437,0.5848


In [11]:
glue_df.groupby("task").count()

Unnamed: 0_level_0,LM,init_lr,slurm_id,dev_acc
task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cola,6,6,6,6
mrpc,6,6,6,6
qnli,6,6,6,6
qqp,6,6,6,6
rte,6,6,6,6
sst2,6,6,6,6


In [19]:
# Compile a dataframe for linear regression
def prepare_data(probing_df, glue_df):

    lms = glue_df["LM"].drop_duplicates().tolist()
    result_df = {"LM": lms}
    for lm in lms:
        # Classification
        # Each task: one LM (one row in glue_df) as target. 
        df = glue_df[glue_df.LM==lm]  # 6 rows
        for i, row in df.iterrows():
            if row.task not in result_df:
                result_df[row.task] = [row.dev_acc]
            else:
                result_df[row.task].append(row.dev_acc)

        # Probing
        # 13 layers x 7 tasks = 91 (rows) as data from probing_df as features.
        df = probing_df[probing_df.LM==lm]  # 91 rows  
        for i, row in df.iterrows():
            featname = "{}_layer_{}".format(row.task, row.layer)
            featval = row.test_acc 
            if featname in result_df:
                result_df[featname].append(featval)
            else:
                result_df[featname] = [featval]
    
    return pd.DataFrame(result_df)

linreg_data = prepare_data(probing_df, glue_df)
linreg_data

Unnamed: 0,LM,rte,cola,qnli,mrpc,sst2,qqp,bigram_shift_layer_0,coordination_inversion_layer_0,obj_number_layer_0,...,past_present_layer_11,subj_number_layer_11,tree_depth_layer_11,bigram_shift_layer_12,coordination_inversion_layer_12,obj_number_layer_12,odd_man_out_layer_12,past_present_layer_12,subj_number_layer_12,tree_depth_layer_12
0,embeddings_roberta_base,0.7726,0.8437,0.9251,0.8995,0.9438,0.9143,0.5,0.5,0.5,...,0.857333,0.793333,0.283714,0.850667,0.654333,0.787333,0.619667,0.879667,0.815333,0.295238
1,embeddings_roberta_base_corr_500,0.7148,0.8322,0.9213,0.875,0.9415,0.9164,0.5,0.5,0.5,...,0.865333,0.704333,0.250381,0.685667,0.546667,0.813,0.582667,0.868,0.806,0.294857
2,embeddings_roberta_base_corr_1000,0.704,0.838,0.9213,0.8824,0.9392,0.916,0.5,0.5,0.5,...,0.819,0.721333,0.259905,0.605667,0.547,0.81,0.564333,0.864333,0.814667,0.297714
3,embeddings_roberta_base_corr_2000,0.6859,0.8341,0.9185,0.8824,0.9415,0.9166,0.5,0.5,0.5,...,0.864333,0.747667,0.27619,0.579,0.539333,0.81,0.560667,0.870333,0.812,0.301143
4,embeddings_roberta_base_corr_4000,0.5848,0.8399,0.9209,0.8873,0.945,0.9165,0.5,0.5,0.5,...,0.864333,0.785667,0.28419,0.559333,0.540667,0.804333,0.570667,0.865333,0.810333,0.302667
5,embeddings_roberta_base_corr_6000,0.6679,0.8293,0.9229,0.8554,0.9392,0.9169,0.5,0.5,0.5,...,0.867667,0.803333,0.287619,0.552333,0.535667,0.813333,0.571333,0.865333,0.81,0.29781


In [None]:
linreg_data.to_csv("../reports/task1_predict_task_performance.csv", index=False)

In [20]:
y = linreg_data["rte"]
#X = linreg_data.drop(columns=["LM", "rte", "cola", "qnli", "mrpc", "sst2", "qqp"])
X = linreg_data[[f"bigram_shift_layer_{i}" for i in range(13)]]
model = sm.OLS(y, X)
results = model.fit()
results.summary()

  return 1 - (np.divide(self.nobs - self.k_constant, self.df_resid)
  * (1 - self.rsquared))
  return np.dot(wresid, wresid) / self.df_resid


0,1,2,3
Dep. Variable:,rte,R-squared:,1.0
Model:,OLS,Adj. R-squared:,
Method:,Least Squares,F-statistic:,
Date:,"Sun, 02 Jan 2022",Prob (F-statistic):,
Time:,22:54:47,Log-Likelihood:,195.98
No. Observations:,6,AIC:,-380.0
Df Residuals:,0,BIC:,-381.2
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
bigram_shift_layer_0,0.2830,inf,0,,,
bigram_shift_layer_1,1.5417,inf,0,,,
bigram_shift_layer_2,-1.5357,inf,-0,,,
bigram_shift_layer_3,-0.3854,inf,-0,,,
bigram_shift_layer_4,0.0662,inf,0,,,
bigram_shift_layer_5,1.0785,inf,0,,,
bigram_shift_layer_6,-0.1433,inf,-0,,,
bigram_shift_layer_7,0.5390,inf,0,,,
bigram_shift_layer_8,0.9428,inf,0,,,

0,1,2,3
Omnibus:,,Durbin-Watson:,0.023
Prob(Omnibus):,,Jarque-Bera (JB):,1.008
Skew:,-1.004,Prob(JB):,0.604
Kurtosis:,2.95,Cond. No.,267.0
