In [None]:
import pickle
import numpy as np
import pandas as pd

import torch
import json
import scipy
import seaborn as sns
import matplotlib.pyplot as plt
import itertools

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix, roc_curve, RocCurveDisplay, ConfusionMatrixDisplay

from embedding import BertHuggingface
from geometrical_bias import SAME, WEAT, GeneralizedWEAT, DirectBias, MAC, normalize, cossim, EmbSetList, EmbSet, GeometricBias
from utils import CLFHead, SimpleCLFHead, CustomModel, JigsawDataset, BiosDataset, DebiasPipeline


## Evaluation
- check if debias reduces extrinsic biases
- plot correlation (job-wise vs. overall)

In [None]:
with open('results/bios_20421/config.json', 'r') as f:
    exp_config = json.load(f)

save_file = exp_config['save_dir']+'res.pickle' #results/bios1/res.pickle' #res_bios.pickle
with open(save_file, 'rb') as handle:
    res = pickle.load(handle)
    exp_parameters = res['params']
    results = res['results']
    #results_test = res['results_eval']

cosine_scores = {'SAME': SAME, 'WEAT': WEAT, 'gWEAT': GeneralizedWEAT, 'DirectBias': DirectBias, 'MAC': MAC}

In [None]:
exp_config

In [None]:
assert len(exp_parameters) == len(results), "shape mismatch: "+str(len(exp_parameters))+" vs. "+str(len(results))

In [None]:
len(results)

In [None]:
suffix = '_neutral'
#if 'neutral' in exp_config['clf_debias']:
#    suffix = '_neutral'

In [None]:
recalls = [res['recall'] for res in results]
counts, bins = np.histogram(recalls)
plt.hist(bins[:-1], bins, weights=counts)
plt.show()

In [None]:
blacklist_models = ['glove-wiki-gigaword-300', 'word2vec-google-news-300']
blacklist_exp = []

cur_model = ""
for i, res in enumerate(results):
    if exp_parameters[i]['embedder'] != cur_model:
        cur_model = exp_parameters[i]['embedder']
        print()
        print(cur_model)
    if res['recall'] > 0.6 and min(res['class_recall']) > 0.3:
        continue
    
    if not cur_model in blacklist_models:
        blacklist_models.append(cur_model)
        blacklist_exp.append(i)
    if exp_parameters[i]['debias']:
        print(exp_parameters[i]['lr'], exp_parameters[i]['debias'], exp_parameters[i]['debias_k'])
    else:
        print(exp_parameters[i]['lr'], exp_parameters[i]['debias'])
    #print(res['recall'])
    #print(res['class_recall'])
    
blacklist_models

In [None]:
np.var(results[0]['WEAT_classwise'], axis=1)

In [None]:
score_list = list(cosine_scores.keys())+['extrinsic']
score_list.remove('gWEAT')
scores_per_bias_type = {bt : {score: [] for score in score_list+['subgroup_AUC', 'BNSP', 'BPSN']} for bt in exp_config['bias_types']}
#class_wise_scores = {score: [] for score in score_list}

counts = {score: 0 for score in score_list}
corr_per_score = {score: {'r': [], 'p': []} for score in score_list}
for i in range(len(results)):
    print("experiment", i, "with bias type", exp_parameters[i]['bias_type'], "and", exp_parameters[i]['embedder'])

#    if i in blacklist_exp:
    if exp_parameters[i]['embedder'] in blacklist_models:
        print("skip blacklisted models")
        continue
    
    for score in score_list:
        #if score == 'SAME':
        #scores_per_bias_type[exp_parameters[i]['bias_type']][score].append(np.mean(np.var(results[i][score+"_classwise"+suffix], axis=1)))
        #else:
        scores_per_bias_type[exp_parameters[i]['bias_type']][score].append(np.mean(results[i][score]))
    for score in ['subgroup_AUC', 'BNSP', 'BPSN']:
        scores_per_bias_type[exp_parameters[i]['bias_type']][score].append(np.mean(np.mean(np.abs(results[i][score]), axis=1)))
    
    for score in score_list:#+['BPSN']:
        if score == 'gWEAT':
            continue
        for score2 in score_list:
            if score == score2 or score2 == 'gWEAT':
                continue
                
            # class-wise bias scores
            if score == 'BPSN':
                scores1 = np.mean(results[i][score], axis=0)
            else:
                scores1 = np.mean(results[i][score+'_classwise'+suffix], axis=0)
            if score2 == 'DirectBias' or score2 == 'MAC':
                scores1 = np.abs(scores1)
            scores2 = np.mean(results[i][score2+'_classwise'+suffix], axis=0)
            
            #scores1 = list(itertools.chain.from_iterable(results[i][score+'_individual']))
            #scores2 = list(itertools.chain.from_iterable(results[i][score2+'_individual']))
            
            #class_wise_scores[score].append(scores1)
            try:
                slope, intercept, r, p, std_err = scipy.stats.linregress(scores1, scores2)
            except ValueError:
                print("invalid values for ", score, score2)
                print(results[i]['recall'])
            if 'extrinsic' in score:
                corr_per_score[score2]['r'].append(r)
                corr_per_score[score2]['p'].append(p)
                
            if 'extrinsic' in score and p < 0.01 and np.abs(r) > 0.7:
                print(score, score2, "R="+str(r)+" (p="+str(p)+")")
                counts[score2] += 1
                #res = {score: scores1, score2: scores2}
                #df = pd.DataFrame(res)
                #sns.regplot(x=score, y=score2, data=df).set_title("R="+str(r)+" (p="+str(p)+")")
                #plt.show()
    print()
    print()

In [None]:
counts

## Correlation of cosine scors with extrinsic bias

In [None]:
mean_corr_per_score = {score: {'r': np.mean(corr_per_score[score]['r']), 'p': np.mean(corr_per_score[score]['p']), 'err': np.std(corr_per_score[score]['r'])} for score in ['SAME', 'WEAT', 'DirectBias', 'MAC']}
mean_corr_per_score

In [None]:
eval_scores = ['SAME', 'WEAT', 'DirectBias']#, 'MAC']
width = 0.5
#offset = np.asarray([-3*width/2, -width/2, width/2, 3*width/2])
x = np.arange(len(eval_scores))

fig, ax = plt.subplots(figsize=(6,6))
    
for i, score in enumerate(eval_scores):
    r_mean = mean_corr_per_score[score]['r']
    r_std = mean_corr_per_score[score]['err']
    p_mean = mean_corr_per_score[score]['p']
    
    ax.bar(x[i], r_mean, width, yerr=r_std)#, label=score)
    
ax.set_ylabel('Mean Pearson Correlation', fontsize=16)
ax.set_xticks(x, eval_scores, fontsize=16)
#ax.set_ylim(-0.19,1.1)
ax.grid(color='grey', linestyle='--', axis='y')
ax.set_title('Pearson Correlations of class-wise cosine scores with TP GAP', fontsize=20)
#ax.legend(loc='upper right', bbox_to_anchor=(0.85, 0.5, 0., 0.5), fontsize=16)
#plt.savefig('plots/word_bias_corr.eps', format='eps')
plt.show()

In [None]:
def set_axis_style(ax, labels):
    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
    ax.set_xlim(0.25, len(labels) + 0.75)
    #ax.set_xlabel('Sample name')
    ax.hlines(0,0.25, len(labels) + 0.75, 'grey', '--', linewidths=0.8)

data = [corr_per_score[score]['r'] for score in eval_scores]
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(9, 9), sharey=True)

# set style for the axes
labels = eval_scores
set_axis_style(ax, labels)

if exp_config['clf_debias'] == 'no':
    ax.set_title('Raw')
elif 'neutral' in exp_config['clf_debias']:
    ax.set_title('Gender-scrubbed')
elif 'resample' in exp_config['clf_debias']:
    ax.set_title('Resampled')
#ax.set_title('Pearson Correlation with class-wise TP GAP')# of class-wise cosine scores with the TP GAP')
ax.set_ylabel('Pearson Coefficient R')
ax.violinplot(data)

plt.subplots_adjust(bottom=0.15, wspace=0.05)
plt.savefig('plots/bios_class_bias_'+exp_config['clf_debias']+'.png', bbox_inches="tight")
#plt.savefig('plots/bios_class_bias.eps', format='eps') # cant handle transparency
plt.show()

In [None]:
mean_scores = {}
for bt, res in scores_per_bias_type.items():
    mean_scores[bt] = {score: 0}
    for score in score_list+['subgroup_AUC', 'BNSP', 'BPSN']:
        mean_scores[bt][score] = np.abs(scores_per_bias_type[bt][score])
        
for comp in ['extrinsic']:#, 'subgroup_AUC', 'BNSP', 'BPSN']:
    for bt, res in mean_scores.items():
        df = pd.DataFrame(res)
        print(bt)
        for score in score_list:
            if not score == 'extrinsic':
                print(score)
                slope, intercept, r, p, std_err = scipy.stats.linregress(df.loc[:,comp], df.loc[:,score])
                #print(r, p)
                sns.regplot(x=comp, y=score, data=df).set_title("R="+str(r)+" (p="+str(p)+")")
                plt.show()
                
                print()

In [None]:
debias_ks = exp_config['debias_k']
embedder = exp_config['embedders']

In [None]:
debias_ks

## Can we distinguish less/more biased models with cosine scores?

In [None]:
if suffix == '_neutral':
    for i in range(len(results)):
        results[i]['extrinsic_neutral'] = [np.mean(fold_bias) for fold_bias in results[i]['extrinsic_classwise_neutral']]

In [None]:
exp_low_bias = []
exp_high_bias = []

biases = [np.mean(results[i]['extrinsic'+suffix]) for i in range(len(results)) if exp_parameters[i]['embedder'] not in blacklist_models]
valid_exp_ids = [i for i in range(len(results)) if exp_parameters[i]['embedder'] not in blacklist_models]
counts, bins = np.histogram(biases)
plt.hist(bins[:-1], bins, weights=counts)
plt.show()

mean = np.mean(biases)
std = np.std(biases)
for i in range(len(biases)):
    if biases[i] < mean-std:
        exp_low_bias.append(valid_exp_ids[i])
    if biases[i] > mean+std:
        exp_high_bias.append(valid_exp_ids[i])

print(exp_low_bias)
print(exp_high_bias)

In [None]:
import matplotlib
font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 20}

matplotlib.rc('font', **font)

def get_score_pred(exp_i, exp_j, score_key):
    return int(np.mean(np.abs(results[exp_i][score_key])) < np.mean(np.abs(results[exp_j][score_key])))
    
extrinsic_pred = []
for i in exp_low_bias:
    for j in exp_high_bias:
        extrinsic_pred.append(get_score_pred(i,j,'extrinsic'+suffix))
        extrinsic_pred.append(get_score_pred(j,i,'extrinsic'+suffix))
print(len(extrinsic_pred))

models = []
models2 = []
for score in cosine_scores.keys():
    
    if score == 'gWEAT': # binary experiment
        continue
    print(score)
    
    score_pred = []
    for i in exp_low_bias:
        for j in exp_high_bias:
            score_pred.append(get_score_pred(i,j,score+suffix))
            score_pred.append(get_score_pred(j,i,score+suffix))
    cm = confusion_matrix(extrinsic_pred, score_pred, normalize='true')
    cm_display = ConfusionMatrixDisplay(cm).plot()
    cm_display.ax_.get_images()[0].set_clim(0, 1)
    cm_display.ax_.get_images()[0].set_cmap(plt.cm.Blues)
    cm_display.ax_.set_title(score)
    plt.savefig('plots/cm_bias_pred_'+score+'.png', bbox_inches="tight")
    plt.show()
    print("ROC AUC: ", roc_auc_score(extrinsic_pred, score_pred))
    print("accuracy: ", accuracy_score(extrinsic_pred, score_pred))
