In [None]:
import pickle
import numpy as np
import pandas as pd

import torch
import json
import scipy
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix, roc_curve, RocCurveDisplay, ConfusionMatrixDisplay

from embedding import BertHuggingface
from geometrical_bias import SAME, WEAT, GeneralizedWEAT, DirectBias, MAC, normalize, cossim, EmbSetList, EmbSet, GeometricBias
from utils import CLFHead, SimpleCLFHead, CustomModel, JigsawDataset, BiosDataset, DebiasPipeline

In [None]:
from datasets import load_dataset
dataset = load_dataset("jigsaw_unintended_bias", data_dir="../../data/jigsaw_bias/")

# Evaluation
- check if debias reduces extrinsic biases
- plot correlation (job-wise vs. overall)

In [None]:
with open('data/protected_groups.json', 'r') as f:
    pg_config = json.load(f)
    
with open('results/jigsaw_20343/config.json', 'r') as f:
    exp_config = json.load(f)
    
#with open(exp_config['batch_size_lookup'], 'r') as f:
#    batch_size_lookup = json.load(f)
    
groups_by_bias_types = pg_config['groups_by_bias_types']
terms_by_groups = pg_config['terms_by_groups']

cosine_scores = {'SAME': SAME, 'WEAT': WEAT, 'gWEAT': GeneralizedWEAT, 'DirectBias': DirectBias, 'MAC': MAC}
optimizer = {'RMSprop': torch.optim.RMSprop, 'Adam': torch.optim.Adam}
criterions = {'BCEWithLogitsLosss': torch.nn.BCEWithLogitsLoss, 'MultiLabelSoftMarginLoss': torch.nn.MultiLabelSoftMarginLoss}

In [None]:
exp_config

In [None]:
save_file = exp_config['save_dir']+'res.pickle'
with open(save_file, 'rb') as handle:
    res = pickle.load(handle)
    exp_parameters = res['params']
    results = res['results']
    #results_test = res['results_eval']

In [None]:
assert len(exp_parameters) == len(results), "shape mismatch: "+str(len(exp_parameters))+" vs. "+str(len(results))

In [None]:
recalls = [res['recall'] for res in results]
precisions = [res['precision'] for res in results]
counts, bins = np.histogram(recalls)

fig, axes = plt.subplots(1,2, figsize=(10,5))
axes[0].hist(bins[:-1], bins, weights=counts)
axes[0].set_xlim(-0.05, 1.05)
axes[0].set_title('Recall')

counts, bins = np.histogram(precisions)
axes[1].hist(bins[:-1], bins, weights=counts)
axes[1].set_xlim(-0.05, 1.05)
axes[1].set_title('Precision')

plt.show()

### Exclude models with bad performance

In [None]:
blacklist_models = []

cur_model = ""
for i, res in enumerate(results):
    if exp_parameters[i]['embedder'] != cur_model:
        cur_model = exp_parameters[i]['embedder']
        print()
        print(cur_model)
    if res['recall'] > 0.3 and res['precision'] > 0.3:
        continue
    
    if not cur_model in blacklist_models:
        blacklist_models.append(cur_model)
    if exp_parameters[i]['debias']:
        print(exp_parameters[i]['lr'], exp_parameters[i]['debias'], exp_parameters[i]['debias_k'])
    else:
        print(exp_parameters[i]['lr'], exp_parameters[i]['debias'])
    
blacklist_models

## Correlation of TP/TN GAP with cosine scores

In [None]:
scores_binary = ['SAME', 'WEAT', 'gWEAT', 'DirectBias', 'MAC']
scores_multi = ['SAME', 'gWEAT', 'DirectBias', 'MAC']
eval_scores = ['TP', 'TN', 'subgroup_AUC', 'BNSP', 'BPSN']

scores_per_bias_type = {bt : {} for bt in exp_config['bias_types']}
for bt in exp_config['bias_types']:
    score_list = scores_multi+eval_scores
    if bt == 'gender':
        score_list = scores_binary+eval_scores
    scores_per_bias_type[bt] = {score: [] for score in score_list}


scores_per_bias_type2 = {bt : {} for bt in exp_config['bias_types']}
for bt in exp_config['bias_types']:
    score_list = scores_multi+eval_scores
    if bt == 'gender':
        score_list = scores_binary+eval_scores
    scores_per_bias_type2[bt] = {score: [] for score in score_list}


for i in range(len(results)):
    print("experiment", i, "with bias type", exp_parameters[i]['bias_type'], "and", exp_parameters[i]['embedder'])
    
    if exp_parameters[i]['embedder'] in blacklist_models+['glove-wiki-gigaword-300']:
        print("skip blastlisted models")
        continue

    gap_worked = True
    for fold_res in results[i]['extrinsic_classwise']:
        if not len(fold_res) == 2:
            gap_worked = False
            break

    if not gap_worked:
        continue

    bt = exp_parameters[i]['bias_type']
    for score in score_list:
        if score == 'TN':
            scores = [fold[0] for fold in results[i]['extrinsic_classwise']]
            scores_per_bias_type[bt][score].append(scores)
            scores_per_bias_type2[bt][score].append(np.mean(scores))
        elif score == 'TP':
            scores = [fold[1] for fold in results[i]['extrinsic_classwise']]
            scores_per_bias_type[bt][score].append(scores)
            scores_per_bias_type2[bt][score].append(np.mean(scores))
        else:
            scores_per_bias_type[bt][score].append(results[i][score])
            scores_per_bias_type2[bt][score].append(np.mean(results[i][score]))
    print(results[i]['extrinsic_classwise'])
    print()


In [None]:
mean_scores = {}
for bt, res in scores_per_bias_type2.items():
    mean_scores[bt] = {score: 0}
    for score in score_list+['subgroup_AUC', 'BNSP', 'BPSN']:
        mean_scores[bt][score] = np.abs(scores_per_bias_type2[bt][score])
        
for comp in ['TP', 'TN']:#, 'subgroup_AUC', 'BNSP', 'BPSN']:
    for bt, res in mean_scores.items():
        df = pd.DataFrame(res)
        print(bt)
        for score in score_list:
            if not score in ['TP', 'TN']:
                print(score)
                slope, intercept, r, p, std_err = scipy.stats.linregress(df.loc[:,comp], df.loc[:,score])
                #print(r, p)
                sns.regplot(x=comp, y=score, data=df).set_title("R="+str(r)+" (p="+str(p)+")")
                plt.show()
                
                print()

In [None]:
for bt, res in scores_per_bias_type.items():
    print(bt)

    tp_res = np.asarray(res['TP'])
    tn_res = np.asarray(res['TN'])
    tp_res = np.hstack(tp_res)
    tn_res = np.hstack(tn_res)
    print(tp_res.shape)
    for score in score_list[:4]: # only cosine scores
        score_res = np.asarray(res[score])

        score_res = np.hstack(score_res)

        df = pd.DataFrame({'TP': tp_res, 'TN': tn_res, score: score_res})
        
        print(score)
        slope, intercept, r, p, std_err = scipy.stats.linregress(tp_res, score_res)
        sns.regplot(x='TP', y=score, data=df).set_title("TP: R="+str(r)+" (p="+str(p)+")")
        plt.show()
        
        slope, intercept, r, p, std_err = scipy.stats.linregress(tn_res, score_res)
        sns.regplot(x='TN', y=score, data=df).set_title("TN: R="+str(r)+" (p="+str(p)+")")
        plt.show()
        print()

## Can we distinguish less/more biased models with cosine scores?

In [None]:
biases = []
auc_biases = []
valid_exp_ids = []
for i in range(len(results)):
    if exp_parameters[i]['embedder'] not in blacklist_models:
        class_biases = results[i]['extrinsic_classwise_neutral']
        valid = True
        for split in class_biases:
            if len(split) != 2:
                valid = False
        if not valid:
            continue

        biases.append(np.mean(class_biases, axis=0))
        auc_biases.append(np.mean(results[0]['subgroup_AUC']))
        valid_exp_ids.append(i)

biases = np.asarray(biases)
auc_biases = np.asarray(auc_biases)

print("TN:")
counts, bins = np.histogram(biases[:,0])
plt.hist(bins[:-1], bins, weights=counts)
plt.show()

print("TP:")
counts, bins = np.histogram(biases[:,1])
plt.hist(bins[:-1], bins, weights=counts)
plt.show()

print("AUC:")
counts, bins = np.histogram(auc_biases)
plt.hist(bins[:-1], bins, weights=counts)
plt.show()

In [None]:
exp_low_bias = {'TP': [], 'TN': [], 'AUC': []}
exp_high_bias = {'TP': [], 'TN': [], 'AUC': []}

mean = np.mean(biases, axis=0)
std = np.std(biases, axis=0)

mean_auc = np.mean(auc_biases, axis=0)
std_auc = np.std(auc_biases, axis=0)
print(mean.shape)
for i in range(len(biases)):
    if biases[i,0] < (mean-std)[0]:
        exp_low_bias['TN'].append(valid_exp_ids[i])
    if biases[i,0] > (mean+std)[0]:
        exp_high_bias['TN'].append(valid_exp_ids[i])
    
    if biases[i,1] < (mean-std)[1]:
        exp_low_bias['TP'].append(valid_exp_ids[i])
    if biases[i,1] > (mean+std)[1]:
        exp_high_bias['TP'].append(valid_exp_ids[i])

print(exp_low_bias)
print(exp_high_bias)

In [None]:
import matplotlib
font = {'family' : 'sans-serif',
        'weight' : 'normal',
        'size'   : 20}

matplotlib.rc('font', **font)

def get_score_pred(exp_i, exp_j, score_key, idx=None):
    if idx is None:
        return int(results[exp_i][score_key] < results[exp_j][score_key])
    else:
        return int(np.mean(np.asarray(results[exp_i][score_key])[:,idx]) < np.mean(np.asarray(results[exp_j][score_key])[:,idx]))


In [None]:
# True positive
extrinsic_pred = []
for i in exp_low_bias['TP']:
    for j in exp_high_bias['TP']:
        extrinsic_pred.append(get_score_pred(i,j,'extrinsic_classwise_neutral', 1))
for i in exp_high_bias['TP']:
    for j in exp_low_bias['TP']:
        extrinsic_pred.append(get_score_pred(i,j,'extrinsic_classwise_neutral', 1))

for score in cosine_scores.keys():
    
    if score == 'gWEAT' and exp_parameters[0]['bias_type'] == 'gender': # binary experiment
        continue
    if score == 'WEAT' and not exp_parameters[0]['bias_type'] == 'gender': # multi group experiment
        continue
    print(score)

    score_pred = []
    for i in exp_low_bias['TP']:
        for j in exp_high_bias['TP']:
            score_pred.append(get_score_pred(i,j,score+'_neutral'))
    for i in exp_high_bias['TP']:
        for j in exp_low_bias['TP']:
            score_pred.append(get_score_pred(i,j,score+'_neutral'))

    print(score_pred)
    cm = confusion_matrix(extrinsic_pred, score_pred, normalize='true')
    cm_display = ConfusionMatrixDisplay(cm).plot()
    cm_display.ax_.get_images()[0].set_clim(0, 1)
    cm_display.ax_.get_images()[0].set_cmap(plt.cm.Blues)
    cm_display.ax_.set_title(score)
    plt.savefig('plots/cm_bias_pred_'+score+'.png', bbox_inches="tight")
    plt.show()
    print("ROC AUC: ", roc_auc_score(extrinsic_pred, score_pred))
    print("accuracy: ", accuracy_score(extrinsic_pred, score_pred))

### True negative predictability was not used in the paper

In [None]:
# True negative
extrinsic_pred = []
for i in exp_low_bias['TN']:
    for j in exp_high_bias['TN']:
        extrinsic_pred.append(get_score_pred(i,j,'extrinsic_classwise_neutral', 0))
for i in exp_high_bias['TN']:
    for j in exp_low_bias['TN']:
        extrinsic_pred.append(get_score_pred(i,j,'extrinsic_classwise_neutral', 0))

for score in cosine_scores.keys():
    
    if score == 'gWEAT' and exp_parameters[0]['bias_type'] == 'gender': # binary experiment
        continue
    if score == 'WEAT' and not exp_parameters[0]['bias_type'] == 'gender': # multi group experiment
        continue
    print(score)

    score_pred = []
    for i in exp_low_bias['TN']:
        for j in exp_high_bias['TN']:
            score_pred.append(get_score_pred(i,j,score+'_neutral'))
    for i in exp_high_bias['TN']:
        for j in exp_low_bias['TN']:
            score_pred.append(get_score_pred(i,j,score+'_neutral'))

    print(score_pred)
    cm = confusion_matrix(extrinsic_pred, score_pred, normalize='true')
    cm_display = ConfusionMatrixDisplay(cm).plot()
    cm_display.ax_.get_images()[0].set_clim(0, 1)
    cm_display.ax_.get_images()[0].set_cmap(plt.cm.Blues)
    cm_display.ax_.set_title(score)
    plt.savefig('plots/cm_bias_pred_'+score+'.png', bbox_inches="tight")
    plt.show()
    print("ROC AUC: ", roc_auc_score(extrinsic_pred, score_pred))
    print("accuracy: ", accuracy_score(extrinsic_pred, score_pred))