In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np

import math

import random

import torch.optim as optim
import torch.nn as nn
import torch

from vocab import Vocab
from featurize import formatBatchedExamples, constructEmbeddingTensorFromVocabAndWvs, getExampleSubset
from models.POSTagger import POSTagger
from modelUtil import train_step, test, precisionRecallEval

from DataLoader import loadNERDatasetXY
from DataBatch import SeqDataBatch

In [2]:
#Data parameters (The whole pipeline will need to rerun if these are changed)
BATCH_SIZE = 64

MAX_SEQ_LEN = 128

SOS_TOKEN = "<SOS>"
PAD_TOKEN = "<PAD>"
UNK_TOKEN = "<UNK>"
EOS_TOKEN = "<EOS>"

TRAIN_PERCENT = 0.8
VAL_PERCENT = 0.1
TEST_PERCENT = 0.1

DATA_TARGET_POS = 1
DATA_TARGET_CHUNK = 2
DATA_TARGET_NER = 3

#Learning Parameters (Only the model training code will need to rerun if these are changed)
LEARNING_RATE = 0.001
EPOCHS = 25
USE_CUDA = False
DEBUG_INTERVAL = 250
L2_REG = 0.001
MOMENTUM = 0.25

DEBIAS_EPS = 1e-10

BIASED_WVS = "data/wvs/data_gender_attributes_optm_json_role_biasedEmbeddingsOut.w2v"
DEBIASED_WVS = "data/wvs/data_gender_attributes_optm_json_role_hardDebiasedEmbeddingsOut.w2v"

In [3]:
print("Loading CONLL2003 NER Data")
trainX, trainY = loadNERDatasetXY("./data/ner/train.txt", sourceField=0, targetField=DATA_TARGET_NER, sortLength=True)
testX,  testY  = loadNERDatasetXY("./data/ner/test.txt", sourceField=0, targetField=DATA_TARGET_NER, sortLength=True)
valX,   valY   = loadNERDatasetXY("./data/ner/valid.txt", sourceField=0, targetField=DATA_TARGET_NER, sortLength=True)

print("Loading biased word vectors")
biasedWvs = {}
f = open(BIASED_WVS)
lines = f.readlines()
f.close()
for line in lines[1:]:
    wv = line.split(" ")
    biasedWvs[wv[0].lower()] = [float(v) for v in wv[1:]]
    
print("Loading debiased word vectors")
debiasedWvs = {}
f = open(DEBIASED_WVS)
lines = f.readlines()
f.close()
for line in lines[1:]:
    wv = line.split(" ")
    debiasedWvs[wv[0].lower()] = [float(v) for v in wv[1:]]
    EMBEDDING_SIZE = len([float(v) for v in wv[1:]])
    
print("Handling wv Edge Cases")
unk_wv = [float(v) for v in np.random.rand(EMBEDDING_SIZE)]
pad_wv = [float(v) for v in np.random.rand(EMBEDDING_SIZE)]

biasedWvs[UNK_TOKEN.lower()] = unk_wv
biasedWvs[PAD_TOKEN.lower()] = pad_wv
debiasedWvs[UNK_TOKEN.lower()] = unk_wv
debiasedWvs[PAD_TOKEN.lower()] = pad_wv

Loading CONLL2003 NER Data
Loading biased word vectors
Loading debiased word vectors
Handling wv Edge Cases


In [4]:
print("Detecting words that had their baises altered")
debiasedTerms = []
for key in biasedWvs.keys():
    totalDiff = sum([math.fabs(a-b) for a, b in zip(biasedWvs[key], debiasedWvs[key])])
    if(totalDiff > DEBIAS_EPS):
        debiasedTerms.append(key)

print("Constructing test set containing examples with debiased terms.")
testX_bias, testY_bias = getExampleSubset(testX, testY, debiasedTerms)

Detecting words that had their baises altered
Constructing test set containing examples with debiased terms.


In [5]:
print("Constructing input Vocabulary")
inputVocab = Vocab(UNK_TOKEN.lower())
for word in biasedWvs.keys():
    inputVocab.add(word.lower())
inputVocab.add(PAD_TOKEN.lower())
print("Constructed an input vocabulary with", len(inputVocab), "entries")

print("Constructing output vocabulary")
outputVocab = Vocab(UNK_TOKEN.lower())
for ty in trainY:
    for label in ty:
        outputVocab.add(label.lower())
outputVocab.add(PAD_TOKEN.lower())
print("Constructed an output vocabulary with", len(outputVocab), "entries")

Constructing input Vocabulary
Constructed an input vocabulary with 302721 entries
Constructing output vocabulary
Constructed an output vocabulary with 11 entries


In [6]:
print("Batching Training Data")

batchedTrainData = []
for i, (x, y) in enumerate(zip(trainX, trainY)):
    if(i % BATCH_SIZE == 0):
        batchedTrainData.append(SeqDataBatch([], [], inputVocab, outputVocab))
    batchedTrainData[-1].addXY(x, y)

formattedBatchedTrainData = []
for b in batchedTrainData:
    b.padBatch(PAD_TOKEN, padX=True, padY=True, padingType="left")
    x, y = b.getNumericXY("torch", unkify=True)
    formattedBatchedTrainData.append((x.long(), y.long()))
print("Generated", len(formattedBatchedTrainData), "Train Batches")

batchedValData = []
for i, (x, y) in enumerate(zip(valX, valY)):
    if(i % BATCH_SIZE == 0):
        batchedValData.append(SeqDataBatch([], [], inputVocab, outputVocab))
    batchedValData[-1].addXY(x, y)

formattedBatchedValData = []
for b in batchedValData:
    b.padBatch(PAD_TOKEN, padX=True, padY=True, padingType="left")
    x, y = b.getNumericXY("torch", unkify=True)
    formattedBatchedValData.append((x.long(), y.long()))
print("Generated", len(formattedBatchedValData), "Val Batches")

batchedTestData = []
for i, (x, y) in enumerate(zip(testX, testY)):
    if(i % BATCH_SIZE == 0):
        batchedTestData.append(SeqDataBatch([], [], inputVocab, outputVocab))
    batchedTestData[-1].addXY(x, y)

formattedBatchedTestData = []
for b in batchedTestData:
    b.padBatch(PAD_TOKEN, padX=True, padY=True, padingType="left")
    x, y = b.getNumericXY("torch", unkify=True)
    formattedBatchedTestData.append((x.long(), y.long()))
print("Generated", len(formattedBatchedTestData), "Test Batches")

batchedBiasTestData = []
for i, (x, y) in enumerate(zip(testX_bias, testY_bias)):
    if(i % BATCH_SIZE == 0):
        batchedBiasTestData.append(SeqDataBatch([], [], inputVocab, outputVocab))
    batchedBiasTestData[-1].addXY(x, y)

formattedBatchedBiasTestData = []
for b in batchedBiasTestData:
    b.padBatch(PAD_TOKEN, padX=True, padY=True, padingType="left")
    x, y = b.getNumericXY("torch", unkify=True)
    formattedBatchedBiasTestData.append((x.long(), y.long()))
print("Generated", len(formattedBatchedBiasTestData), "Test Bias Batches")

Batching Training Data
Generated 235 Train Batches
Generated 55 Val Batches
Generated 58 Test Batches
Generated 473 Test Bias Batches


# Training from biased word vectors

In [7]:
posModel = POSTagger(EMBEDDING_SIZE, 20, len(inputVocab), len(outputVocab))
posModel.setEmbeddings(constructEmbeddingTensorFromVocabAndWvs(biasedWvs, inputVocab, EMBEDDING_SIZE), freeze=True)

In [8]:
device = torch.device("cuda" if USE_CUDA else "cpu")
optimizer = optim.RMSprop(posModel.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=L2_REG)
criterion = nn.CrossEntropyLoss()
print("Starting Training")

start = True
bestValLoss = test(posModel, device, formattedBatchedValData, criterion)    
for epoch in range(1, EPOCHS + 1):
    loss = train_step(posModel, device, formattedBatchedTrainData, optimizer, criterion, epoch, DEBUG_INTERVAL)
    val_loss = test(posModel, device, formattedBatchedValData, criterion)
    precision, recall, f1 = precisionRecallEval(posModel, device, formattedBatchedValData)
    print("Epoch #{} - \n\tVal Loss: {:.6f} \n\tVal Precision {:6f} \n\tVal Recall {:6f} \n\tVal Macro F1 {:6f}".format(epoch, val_loss, precision, recall, f1))
    if(val_loss < bestValLoss and not start):
        torch.save(posModel.state_dict(), "models/savedModels/model.m")
        bestValLoss = val_loss
    start = False

print("Found best case validation loss to be " + str(bestValLoss) + "\n\t... Loading saved model and testing")
posModel.load_state_dict(torch.load("models/savedModels/model.m"))
test_loss = test(posModel, device, formattedBatchedTestData, criterion)
precision, recall, f1 = precisionRecallEval(posModel, device, formattedBatchedTestData)
print("TEST DATA -\n\tTest Loss: {:.6f} \n\tTest Precision {:6f} \n\tTest Recall {:6f} \n\tTest Macro F1 {:6f}".format(test_loss, precision, recall, f1))

Starting Training
Epoch #1 - 
	Val Loss: 0.000970 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #2 - 
	Val Loss: 0.000531 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #3 - 
	Val Loss: 0.000636 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #4 - 
	Val Loss: 0.000861 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #5 - 
	Val Loss: 0.000934 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #6 - 
	Val Loss: 0.000963 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #7 - 
	Val Loss: 0.000994 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #8 - 
	Val Loss: 0.001026 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #9 - 
	Val Loss: 0.001055 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #10 - 
	Val Loss: 0.001081 
	Val Precision 1.0000

In [9]:
posModel.load_state_dict(torch.load("models/savedModels/model.m"))
posModel.setEmbeddings(constructEmbeddingTensorFromVocabAndWvs(biasedWvs, inputVocab, EMBEDDING_SIZE), freeze=True)
biased_precision, biased_recall, biased_f1 = precisionRecallEval(posModel, device, formattedBatchedBiasTestData)
test_biased_loss = test(posModel, device, formattedBatchedBiasTestData, criterion)
print("============= BIASED EMBEDDINGS TEST RESULTS =============")
print("loss: " + str(test_biased_loss))
print("precision: " + str(biased_precision))
print("recall: " + str(biased_recall))
print("f1: " + str(biased_f1))

posModel.load_state_dict(torch.load("models/savedModels/model.m"))
posModel.setEmbeddings(constructEmbeddingTensorFromVocabAndWvs(debiasedWvs, inputVocab, EMBEDDING_SIZE), freeze=True)
debiased_precision, debiased_recall, debiased_f1 = precisionRecallEval(posModel, device, formattedBatchedBiasTestData)
test_debiased_loss = test(posModel, device, formattedBatchedBiasTestData, criterion)
print("============= DEBIASED EMBEDDINGS TEST RESULTS =============")
print("loss: " + str(test_debiased_loss))
print("precision: " + str(debiased_precision))
print("recall: " + str(debiased_recall))
print("f1: " + str(debiased_f1))

print("============= EMBEDDINGS COMPARISION RESULTS =============")
print("delta loss: " + str(test_debiased_loss - test_biased_loss))
print("delta precision: " + str(debiased_precision - biased_precision))
print("delta recall: " + str(debiased_recall - biased_recall))
print("delta f1: " + str(debiased_f1 - biased_f1))

loss: 0.00017368417236148794
precision: 1.0
recall: 1.0
f1: 1.0
loss: 0.0011560477208451038
precision: 1.0
recall: 1.0
f1: 1.0
delta loss: 0.0009823635484836157
delta precision: 0.0
delta recall: 0.0
delta f1: 0.0


# Training from debiased word vectors

In [10]:
debias_posModel = POSTagger(EMBEDDING_SIZE, 20, len(inputVocab), len(outputVocab))
debias_posModel.setEmbeddings(constructEmbeddingTensorFromVocabAndWvs(debiasedWvs, inputVocab, EMBEDDING_SIZE), freeze=True)

In [11]:
device = torch.device("cuda" if USE_CUDA else "cpu")
optimizer = optim.RMSprop(debias_posModel.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=L2_REG)
criterion = nn.CrossEntropyLoss()
print("Starting Training")

start = True
bestValLoss = test(debias_posModel, device, formattedBatchedValData, criterion)   
for epoch in range(1, EPOCHS + 1):
    loss = train_step(debias_posModel, device, formattedBatchedTrainData, optimizer, criterion, epoch, DEBUG_INTERVAL)
    val_loss = test(debias_posModel, device, formattedBatchedValData, criterion)
    precision, recall, f1 = precisionRecallEval(debias_posModel, device, formattedBatchedValData)
    print("Epoch #{} - \n\tVal Loss: {:.6f} \n\tVal Precision {:6f} \n\tVal Recall {:6f} \n\tVal Macro F1 {:6f}".format(epoch, val_loss, precision, recall, f1))
    if(val_loss < bestValLoss and not start):
        torch.save(debias_posModel.state_dict(), "models/savedModels/debiased_model.m")
        bestValLoss = val_loss
    start = False

print("Found best case validation loss to be " + str(bestValLoss) + "\n... Loading saved model and testing")
debias_posModel.load_state_dict(torch.load("models/savedModels/debiased_model.m"))
test_loss = test(debias_posModel, device, formattedBatchedTestData, criterion)
precision, recall, f1 = precisionRecallEval(debias_posModel, device, formattedBatchedTestData)
print("TEST DATA -\n\tTest Loss: {:.6f} \n\tTest Precision {:6f} \n\tTest Recall {:6f} \n\tTest Macro F1 {:6f}".format(test_loss, precision, recall, f1))
  

Starting Training
Epoch #1 - 
	Val Loss: 0.001901 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #2 - 
	Val Loss: 0.000793 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #3 - 
	Val Loss: 0.000681 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #4 - 
	Val Loss: 0.000928 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #5 - 
	Val Loss: 0.001075 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #6 - 
	Val Loss: 0.001103 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #7 - 
	Val Loss: 0.001134 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #8 - 
	Val Loss: 0.001173 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #9 - 
	Val Loss: 0.001212 
	Val Precision 1.000000 
	Val Recall 1.000000 
	Val Macro F1 1.000000
Epoch #10 - 
	Val Loss: 0.001246 
	Val Precision 1.0000

In [12]:
posModel.load_state_dict(torch.load("models/savedModels/model.m"))
a_biased_precision, a_biased_recall, a_biased_f1 = precisionRecallEval(posModel, device, formattedBatchedBiasTestData)
a_test_biased_loss = test(posModel, device, formattedBatchedBiasTestData, criterion)
print("============= BIASED MODEL TEST RESULTS =============")
print("loss: " + str(a_test_biased_loss))
print("precision: " + str(a_biased_precision))
print("recall: " + str(a_biased_recall))
print("f1: " + str(a_biased_f1))

debias_posModel.load_state_dict(torch.load("models/savedModels/debiased_model.m"))
a_debiased_precision, a_debiased_recall, a_debiased_f1 = precisionRecallEval(debias_posModel, device, formattedBatchedBiasTestData)
a_test_debiased_loss = test(debias_posModel, device, formattedBatchedBiasTestData, criterion)
print("============= DEBIASED MODEL TEST RESULTS =============")
print("loss: " + str(a_test_debiased_loss))
print("precision: " + str(a_debiased_precision))
print("recall: " + str(a_debiased_recall))
print("f1: " + str(a_debiased_f1))

print("============= MODEL COMPARISION RESULTS =============")
print("delta loss: " + str(a_test_debiased_loss - a_test_biased_loss))
print("delta precision: " + str(a_debiased_precision - a_biased_precision))
print("delta recall: " + str(a_debiased_recall - a_biased_recall))
print("delta f1: " + str(a_debiased_f1 - a_biased_f1))

loss: 0.00017368417236148794
precision: 1.0
recall: 1.0
f1: 1.0
loss: 0.00033492411393746196
precision: 1.0
recall: 1.0
f1: 1.0
delta loss: 0.00016123994157597401
delta precision: 0.0
delta recall: 0.0
delta f1: 0.0
