In [None]:
import numpy as np
import math
import os
import pandas as pd
from operator import itemgetter
import pickle
from tqdm import tqdm
import scipy
import random
import yaml

import difflib
import string
import json

import matplotlib.pyplot as plt
import matplotlib
from matplotlib.patches import Patch
import seaborn as sns

import torch
from torch import Tensor
import datasets
from datasets import load_dataset
from embedding import BertHuggingfaceMLM
from sklearn.decomposition import PCA
from torch.utils.data import DataLoader, TensorDataset

from geometrical_bias import SAME, WEAT, GeneralizedWEAT, DirectBias, RIPA, MAC, normalize, cossim, EmbSetList, EmbSet, GeometricBias
from unmasking_bias import PLLBias
from sklearn.metrics import roc_auc_score, confusion_matrix, roc_curve, RocCurveDisplay, ConfusionMatrixDisplay, accuracy_score

from utils import CLFHead, SimpleCLFHead, CustomModel, CrowSPairsDataset, JigsawDataset, BiosDataset, MLMPipeline

In [None]:
with open('data/protected_groups.json', 'r') as f:
    pg_config = json.load(f)
    
with open('data/batch_size_lookup_1080.json', 'r') as f:
    batch_size_lookup = json.load(f)
    
with open('results/mlm_20340/config.json', 'r') as f:
    exp_config = json.load(f)
    
groups_by_bias_types = pg_config['groups_by_bias_types']
terms_by_groups = pg_config['terms_by_groups']

cosine_scores = {'SAME': SAME, 'WEAT': WEAT, 'gWEAT': GeneralizedWEAT, 'DirectBias': DirectBias}#, 'MAC': MAC}


In [None]:
exp_config

In [None]:
with open(exp_config['save_file'], 'rb') as handle:
    res_dict = pickle.load(handle)
    
res_dict.keys()

In [None]:
params = res_dict['params']
results = res_dict['results']

In [None]:
blacklist_models = ['google/electra-small-generator', "albert-xlarge-v2", "albert-xxlarge-v2", "microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext", "nlpaueb/legal-bert-base-uncased"]

In [None]:
score_list = list(cosine_scores.keys())+['extrinsic']
scores_per_bias_type = {bt : {score: [] for score in score_list} for bt in exp_config['bias_types']}
sample_corr = {bt : {score: [] for score in score_list[:-1]} for bt in exp_config['bias_types']}
for i in range(len(results)):
    if params[i]['mlm'] in blacklist_models:
        continue
    if 'mlm' not in params[i].keys():
        continue
    #print("experiment", i, "with bias type", params[i]['bias_type'])
    for score in score_list:
        scores_per_bias_type[params[i]['bias_type']][score].append(results[i][score])
    
    print(results[i].keys())
    for score in score_list:
        if not score+'_individual' in results[i].keys():
            continue
        for score2 in score_list:
            if score == score2:
                continue # TODO 
            if not score2+'_individual' in results[i].keys():
                continue
            if score == 'extrinsic':
                # extrinsic vs. cosine score
                #slope, intercept, r, p, std_err = scipy.stats.linregress(np.abs(results[i][score+'_individual']), np.abs(results[i][score2+'_individual'])) # this doesnt work
                slope, intercept, r, p, std_err = scipy.stats.linregress(np.abs(results[i][score+'_individual']), np.abs(results[i][score2+'_cs']))
                
                
                if p < 0.05:
                    print(score, score2, "R="+str(r)+" (p="+str(p)+")")
                    sample_corr[params[i]['bias_type']][score2].append(r)
    print()

## Sample bias correlation

In [None]:
sample_corr

In [None]:
def set_axis_style(ax, labels):
    ax.set_xticks(np.arange(1, len(labels) + 1), labels=labels)
    ax.set_xlim(0.25, len(labels) + 0.75)
    #ax.set_xlabel('Sample name')
    ax.hlines(0,0.25, len(labels) + 0.75, 'grey', '--', linewidths=0.8)

for bt, res in sample_corr.items():    
    data = []
    labels = []
    for score in score_list[:-1]:
        if len(res[score]) > 0: # skip gWEAT/ WEAT for non-binary
            data.append(res[score])
            labels.append(score)
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(9, 9), sharey=True)
    
    # set style for the axes
    set_axis_style(ax, labels)

    ax.set_title(exp_config['bias_types'][0])
 #   ax.set_title('Pearson Correlation with PLL(more)-PLL(less)')# of class-wise cosine scores with the TP GAP')
    ax.set_ylabel('Pearson Coefficient R')
    ax.violinplot(data)
    
    plt.subplots_adjust(bottom=0.15, wspace=0.05)
    plt.savefig('plots/mlm_sample_bias_'+bt+'.png', bbox_inches="tight")
    plt.show()

## Aggregated bias correlation:

In [None]:
for bt, res in scores_per_bias_type.items():
    df = pd.DataFrame(res)
    print(bt)
    for score in score_list:
        if not score == 'extrinsic':
            print(score)
            slope, intercept, r, p, std_err = scipy.stats.linregress(res[score], res[score2])
            print("R="+str(r)+" (p="+str(p)+")")
            print()
            
            ax = sns.regplot(x="extrinsic", y=score, data=df).set_title(exp_config['bias_types'][0]+" R=%.3f (p=%.4f)" % (r,p))
            #ax.set(xlabel='% PLL(more) > PLL(less)')
            plt.savefig('plots/mlm_aggr_corr_'+score+'_'+bt+'.png', bbox_inches="tight")
            plt.show()
            

## Can we distinguish more/less biased models?

In [None]:
exp_low_bias = []
exp_high_bias = []
#bias_type = 'religion'

biases = [np.mean(results[i]['extrinsic']) for i in range(len(results))]# if params[i]['bias_type'] == bias_type]
valid_exp_ids = [i for i in range(len(results))]# if params[i]['bias_type'] == bias_type]
counts, bins = np.histogram(biases)
plt.hist(bins[:-1], bins, weights=counts)
plt.show()

mean = np.mean(biases)
std = np.std(biases)
for i in range(len(biases)):
    if biases[i] < mean-std:
        exp_low_bias.append(valid_exp_ids[i])
    if biases[i] > mean+std:
        exp_high_bias.append(valid_exp_ids[i])

print(exp_low_bias)
print(exp_high_bias)

In [None]:
import matplotlib
font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 20}

matplotlib.rc('font', **font)

bias_type = exp_config['bias_types'][0]
def get_score_pred(exp_i, exp_j, score_key):
    return int(results[exp_i][score_key] < results[exp_j][score_key])
    
extrinsic_pred = []
for i in exp_low_bias:
    for j in exp_high_bias:
        extrinsic_pred.append(get_score_pred(i,j,'extrinsic'))
for i in exp_high_bias:
    for j in exp_low_bias:
        extrinsic_pred.append(get_score_pred(i,j,'extrinsic'))
print(len(extrinsic_pred))
print(extrinsic_pred)

for score in cosine_scores.keys():
    
    if bias_type in ['age', 'gender'] and score == 'gWEAT': # binary experiment
        continue
    if bias_type in ['race-color', 'religion'] and score == 'WEAT':
        continue
    print(score)
    
    score_pred = []
    for i in exp_low_bias:
        for j in exp_high_bias:
            score_pred.append(get_score_pred(i,j,score))
    for i in exp_high_bias:
        for j in exp_low_bias:
            score_pred.append(get_score_pred(i,j,score))
    print(score_pred)

    cm = confusion_matrix(extrinsic_pred, score_pred, normalize='true')
    cm_display = ConfusionMatrixDisplay(cm).plot()
    cm_display.ax_.get_images()[0].set_clim(0, 1)
    cm_display.ax_.get_images()[0].set_cmap(plt.cm.Blues)
    cm_display.ax_.set_title(score)
    plt.savefig('plots/cm_bias_pred_'+score+'.png', bbox_inches="tight")
    plt.show()
    print("ROC AUC: ", roc_auc_score(extrinsic_pred, score_pred))
    print("accuracy: ", accuracy_score(extrinsic_pred, score_pred))