## Reference:
Manzini, T.; Lim, Y. C.; Tsvetkov, Y.; and Black, A. W.2019. Black is to criminal as caucasian is to police: Detecting and removing multiclass bias in word embeddings. NAACL.

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np

import math

import random

import torch.optim as optim
import torch.nn as nn
import torch

from vocab import Vocab
from featurize import formatBatchedExamples, constructEmbeddingTensorFromVocabAndWvs, getExampleSubset
from models.POSTagger import POSTagger
from modelUtil import train_step, test, precisionRecallEval

from DataLoader import loadNERDatasetXY
from DataBatch import SeqDataBatch
from sklearn.metrics import f1_score, recall_score, precision_score

In [2]:
#Data parameters (The whole pipeline will need to rerun if these are changed)
BATCH_SIZE = 2

MAX_SEQ_LEN = 128

SOS_TOKEN = "<SOS>"
PAD_TOKEN = "<PAD>"
UNK_TOKEN = "<UNK>"
EOS_TOKEN = "<EOS>"

TRAIN_PERCENT = 0.8
VAL_PERCENT = 0.1
TEST_PERCENT = 0.1

DATA_TARGET_POS = 1
DATA_TARGET_CHUNK = 2

#Learning Parameters (Only the model training code will need to rerun if these are changed)
LEARNING_RATE = 0.001
EPOCHS = 25
USE_CUDA = False
DEBUG_INTERVAL = 250
L2_REG = 0.001
MOMENTUM = 0.25

DEBIAS_EPS = 1e-3
EMBEDDING_SIZE = 300


device = torch.device("cpu")

In [4]:
VEC_LEN = 300
glove_file = open("../Data/WordEmbedding/glove_wiki_vectors.txt", 'r')
glove_word = {}
for line in glove_file:
    line = line.strip()
    _word = line.split(' ')
    vector = np.array([float(num) for num in _word[1:]])
    if len(vector) != VEC_LEN: 
        raise Exception("Word dimension is wrong")
    glove_word[_word[0]] = vector
glove_file.close()
print(len(glove_word))

322636


In [5]:
# Debias word vector file name
file_name = "P_DeSIP_vectors.txt"

In [6]:
import pandas as pd
VEC_LEN = 300
debias_file = open("../Data/WordEmbedding/" + file_name, 'r')
debias_word = {}
for line in debias_file:
    line = line.strip()
    _word = line.split(' ')
    vector = np.array([float(num) for num in _word[1:]])
    if len(vector) != VEC_LEN: 
        raise Exception("Word dimension is wrong")
    debias_word[_word[0]] = vector
debias_file.close()

## torchtext

In [8]:
from torchtext import data, datasets
from torchtext.legacy.data import Field, TabularDataset, BucketIterator, Pipeline

In [9]:
def getDataSet(biasedWvs, debiasedWvs, task, batch_size = 5):
    tokenize = lambda x: x.split()
    X = Field(sequential=True,use_vocab=True,tokenize=tokenize,preprocessing=Pipeline(lambda x: x.lower()))
    Y = Field(sequential=True,use_vocab=True,tokenize=tokenize,is_target=True,preprocessing=Pipeline(lambda x: x.lower()))
    fields = {
     'x':('X', X), 
     'y':('Y', Y)
         }
    data = TabularDataset.splits(path='./data/'+ task + '/', train='train.csv',validation='valid.csv',test='test.csv', format='csv', fields=fields)
    X.build_vocab(data[0],specials=['<pad>'])
    Y.build_vocab(data[0],specials=['<pad>'])
    data_iter = BucketIterator.splits(data,batch_size=batch_size,device='cpu',shuffle=True,sort=False)
    train_iter = data_iter[0]
    val_iter = data_iter[1]
    test_iter = data_iter[2]
    
    in_embed = []
    wvTensor = [[0.0]*EMBEDDING_SIZE for _ in range(len(X.vocab.itos))]
    for i in range(len(X.vocab.itos)):
        try:
            wvTensor[i] = biasedWvs[X.vocab.itos[i]]
        except:
            in_embed.append(i)
            wvTensor[i] = [float(v) for v in np.random.rand(EMBEDDING_SIZE)]
            #pdb.set_trace()
    embedding_debias = wvTensor.copy()
    embedding_org = torch.tensor(wvTensor)
    print("length of vocab of org: ", len(embedding_org))
    print("length of unknown: ", len(in_embed))
    for i in range(len(X.vocab.itos)):
        if i not in in_embed:
            embedding_debias[i] = debiasedWvs[X.vocab.itos[i]]
    embedding_debias = torch.tensor(embedding_debias)
    print("length of vocab of debias: ", len(embedding_debias))
    
    return embedding_org, embedding_debias, X, Y, train_iter, val_iter, test_iter

## Embedding Matrix Replacement

In [10]:
def trainBiased(task, embedding_org, embedding_debias, X, Y, train_iter, val_iter, test_iter):
    posModel = POSTagger(EMBEDDING_SIZE, 20, len(X.vocab.itos), len(Y.vocab.itos))
    posModel.setEmbeddings(embedding_org, freeze=True)
    device = torch.device("cuda" if USE_CUDA else "cpu")
    optimizer = optim.RMSprop(posModel.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=L2_REG)
    criterion = nn.CrossEntropyLoss()
    print("Starting Training")

    start = True
    bestValLoss = 1000 #test(posModel, device, formattedBatchedValData, criterion) 
    
    for epoch in range(1, EPOCHS + 1):
        #for batch in train_iter:
        
        loss = train_step(posModel, device, train_iter, optimizer, criterion, epoch, DEBUG_INTERVAL)
        #pdb.set_trace()
        train_loss_record.append(loss)
        val_loss = test(posModel, device, val_iter , criterion)
        train_loss = test(posModel, device, train_iter , criterion)
        result_val = precisionRecallEval(posModel, device, val_iter)
        result_train = precisionRecallEval(posModel, device, train_iter)
        # print(result)
        # precision, recall, f1 = precisionRecallEval(posModel, device, formattedBatchedValData)
        print("Epoch #{} - \n\tVal Loss: {:.6f} \n\tVal Precision {:6f} \n\tVal Recall {:6f} \n\tVal Macro F1 {:6f}".format(epoch, val_loss, result_val[0], result_val[1], result_val[2]))
        print("Epoch #{} - \n\tTrain Loss: {:.6f} \n\tTrain Precision {:6f} \n\tTrain Recall {:6f} \n\tTrain Macro F1 {:6f}".format(epoch, train_loss, result_train[0], result_train[1], result_train[2]))
    
        if(val_loss < bestValLoss and not start):
            torch.save(posModel.state_dict(), "models/savedModels/model_" + task + "_org.m")
            bestValLoss = val_loss
        start = False

    print("Found best case validation loss to be " + str(bestValLoss) + "\n\t... Loading saved model and testing")
    posModel.load_state_dict(torch.load("models/savedModels/model_" + task + "_org.m"))
    test_loss = test(posModel, device, test_iter, criterion)
    result = precisionRecallEval(posModel, device, test_iter)
    print("TEST DATA -\n\tTest Loss: {:.6f} \n\tTest Precision {:6f} \n\tTest Recall {:6f} \n\tTest Macro F1 {:6f}".format(test_loss, result[0], result[1], result[2]))

    result_name = []
    result = []
    posModel = POSTagger(EMBEDDING_SIZE, 20, len(X.vocab.itos), len(Y.vocab.itos))
    posModel.load_state_dict(torch.load("models/savedModels/model_" + task + "_org.m"))
    posModel.setEmbeddings(embedding_org, freeze=True)

    biased_precision, biased_recall, biased_f1, _ = precisionRecallEval(posModel, device, test_iter)
    test_biased_loss = test(posModel, device, test_iter, criterion)
    result_name.extend(["Bias", "loss: ", "precision: ", "recall: ", "f1: "])
    result.extend([" ", test_biased_loss, biased_precision, biased_recall, biased_f1])
    print("============= BIASED EMBEDDINGS TEST RESULTS =============")
    print("loss: " + str(test_biased_loss))
    print("precision: " + str(biased_precision))
    print("recall: " + str(biased_recall))
    print("f1: " + str(biased_f1))
    
    posModel = POSTagger(EMBEDDING_SIZE, 20, len(X.vocab.itos), len(Y.vocab.itos))
    posModel.load_state_dict(torch.load("models/savedModels/model_" + task + "_org.m"))
    posModel.setEmbeddings(embedding_debias, freeze=True)

    debiased_precision, debiased_recall, debiased_f1, _ = precisionRecallEval(posModel, device, test_iter)
    test_debiased_loss = test(posModel, device, test_iter, criterion)
    result_name.extend(["Debias", "loss: ", "precision: ", "recall: ", "f1: "])
    result.extend([" ", test_debiased_loss, debiased_precision, debiased_recall, debiased_f1])
    print("============= BIASED EMBEDDINGS TEST RESULTS =============")
    print("loss: " + str(test_debiased_loss))
    print("precision: " + str(debiased_precision))
    print("recall: " + str(debiased_recall))
    print("f1: " + str(debiased_f1))
    
    print("============= EMBEDDINGS COMPARISION RESULTS =============")
    print("delta loss: " + str(test_debiased_loss - test_biased_loss))
    print("delta precision: " + str(debiased_precision - biased_precision))
    print("delta recall: " + str(debiased_recall - biased_recall))
    print("delta f1: " + str(debiased_f1 - biased_f1))

    return result_name, result

## Model Retraining

In [11]:
def trainDebiased(task, embedding_org, embedding_debias, X, Y, train_iter, val_iter, test_iter):
    posModel = POSTagger(EMBEDDING_SIZE, 20, len(X.vocab.itos), len(Y.vocab.itos))
    posModel.setEmbeddings(embedding_debias, freeze=True)
    
    optimizer = optim.RMSprop(posModel.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=L2_REG)
    criterion = nn.CrossEntropyLoss()
    print("Starting Training")

    start = True
    bestValLoss = 1000 #test(posModel, device, formattedBatchedValData, criterion) 
    train_loss_record = []
    for epoch in range(1, EPOCHS + 1):
        #for batch in train_iter:

        loss = train_step(posModel, device, train_iter, optimizer, criterion, epoch, DEBUG_INTERVAL)
        #pdb.set_trace()
        train_loss_record.append(loss)
        val_loss = test(posModel, device, val_iter , criterion)
        train_loss = test(posModel, device, train_iter , criterion)
        result_val = precisionRecallEval(posModel, device, val_iter)
        result_train = precisionRecallEval(posModel, device, train_iter)
        # print(result)
        # precision, recall, f1 = precisionRecallEval(posModel, device, formattedBatchedValData)
        print("Epoch #{} - \n\tVal Loss: {:.6f} \n\tVal Precision {:6f} \n\tVal Recall {:6f} \n\tVal Macro F1 {:6f}".format(epoch, val_loss, result_val[0], result_val[1], result_val[2]))
        print("Epoch #{} - \n\tTrain Loss: {:.6f} \n\tTrain Precision {:6f} \n\tTrain Recall {:6f} \n\tTrain Macro F1 {:6f}".format(epoch, train_loss, result_train[0], result_train[1], result_train[2]))

        if(val_loss < bestValLoss and not start):
            torch.save(posModel.state_dict(), "models/savedModels/model_" + task + "_debias.m")
            bestValLoss = val_loss
        start = False

    print("Found best case validation loss to be " + str(bestValLoss) + "\n\t... Loading saved model and testing")
    posModel.load_state_dict(torch.load("models/savedModels/model_" + task + "_debias.m"))
    test_loss = test(posModel, device, test_iter, criterion)
    result = precisionRecallEval(posModel, device, test_iter)
    print("TEST DATA -\n\tTest Loss: {:.6f} \n\tTest Precision {:6f} \n\tTest Recall {:6f} \n\tTest Macro F1 {:6f}".format(test_loss, result[0], result[1], result[2]))
    
    result = []
    posModel = POSTagger(EMBEDDING_SIZE, 20, len(X.vocab.itos), len(Y.vocab.itos))
    posModel.load_state_dict(torch.load("models/savedModels/model_" + task + "_debias.m"))
    posModel.setEmbeddings(embedding_debias, freeze=True)

    debiased_precision, debiased_recall, debiased_f1, _ = precisionRecallEval(posModel, device, test_iter)
    test_debiased_loss = test(posModel, device, test_iter, criterion)
    
    result.extend([" ", test_debiased_loss, debiased_precision, debiased_recall, debiased_f1])
    print("============= DEBIASED EMBEDDINGS TEST RESULTS =============")
    print("loss: " + str(test_debiased_loss))
    print("precision: " + str(debiased_precision))
    print("recall: " + str(debiased_recall))
    print("f1: " + str(debiased_f1))

    return result
    
    
    
    

In [12]:
def calculate(resultBias, resultDebias, nameBias):
    result = []
    result_name = []
    result_name.extend(nameBias)
    result.extend(resultBias)
    
    result_name.extend(["Comparison", "delta loss: ", "delta precision: ", "delta recall: ", "delta f1: "])
    result.extend([" ", resultBias[6] - resultBias[1], resultBias[7] - resultBias[2], resultBias[8] - resultBias[3], resultBias[9] - resultBias[4]])
    
    
    result_name.extend(["Debias train", "loss: ", "precision: ", "recall: ", "f1: "])
    result.extend(resultDebias)
    
    result_name.extend(["Comparison", "delta loss: ", "delta precision: ", "delta recall: ", "delta f1: "])
    result.extend([" ", resultDebias[1] - resultBias[1], resultDebias[2] - resultBias[2], resultDebias[3] - resultBias[3], resultDebias[4] - resultBias[4]])

    return result_name, result
    

In [13]:
def main(task, biasedWvs, debiasedWvs):
    embedding_org, embedding_debias, X, Y, train_iter, val_iter, test_iter = getDataSet(biasedWvs, debiasedWvs, task, 5)
    nameBias, resultBias = trainBiased(task, embedding_org, embedding_debias, X, Y, train_iter, val_iter, test_iter)

    resultDebias = trainDebiased(task, embedding_org, embedding_debias, X, Y, train_iter, val_iter, test_iter)

    name, number = calculate(resultBias, resultDebias, nameBias)
    
    return name, number

    

In [16]:
train_loss_record = []
final_name = []
final_result = []
for task in ["pos", "ner", "chunking"]:
    final_name.append(task)
    final_result.append(" ")
    result_name, result = main(task, glove_word, debias_word)
    final_name.extend(result_name)
    final_result.extend(result)
df = pd.DataFrame({"label":final_name, file_name:final_result})
df.to_csv("../Result/performance/" + file_name.replace('/', '_') + "1.csv", index=False)

In [15]:
import pandas as pd
df = pd.DataFrame({"label":final_name, file_name:final_result})
df

Unnamed: 0,label,Hard_bias
0,pos,
1,Bias,
2,loss:,0.372697
3,precision:,0.938881
4,recall:,0.874931
...,...,...
73,Comparison,
74,delta loss:,0.076395
75,delta precision:,0.007522
76,delta recall:,-0.043787
