In [3]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import f1_score
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.autograd as autograd
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
# from torch.utils.tensorboard import SummaryWriter
# from tensorboardX import SummaryWriter


import torchtext
from torchtext.data import get_tokenizer
from torchtext import data, datasets

import json
import random

from tqdm.notebook import tqdm, trange

from POS_utils import *

In [4]:
# Dataset path
TrustPilot_processed_dataset_path = "..//dataset//TrustPilot_processed//"
WebEng_processed_dataset_path = "..//dataset//Web_Eng//"

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_name = 'Models/EVP_ADV_{}.pt'

In [5]:
####################################
#         Hyper-parameters         #
####################################
BATCH_SIZE = 64
LEARNING_RATE = 1e-3
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.25
NUM_EPOCHS = 100
SEED = 960925
MIN_FREQ = 2
SAMPLING_INDEX = 10
LAMBDA = 1e-3

In [6]:

class BiLSTMPOSTagger(nn.Module):
    def __init__(self, 
                 input_dim, 
                 embedding_dim, 
                 hidden_dim, 
                 output_dim, 
                 n_layers, 
                 bidirectional, 
                 dropout, 
                 pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(input_dim, embedding_dim, padding_idx = pad_idx)
        
        self.lstm = nn.LSTM(embedding_dim, 
                            hidden_dim, 
                            num_layers = n_layers, 
                            bidirectional = bidirectional,
                            dropout = dropout if n_layers > 1 else 0)
        
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):

        #text = [sent len, batch size]
        embedded = self.dropout(self.embedding(text))
        
        #embedded = [sent len, batch size, emb dim]
        outputs, (hidden, cell) = self.lstm(embedded)
        
        #output = [sent len, batch size, hid dim * n directions]
        #hidden/cell = [n layers * n directions, batch size, hid dim]
        
        predictions = self.fc(self.dropout(outputs))
        
        #predictions = [sent len, batch size, output dim]
        return predictions
    
    def hidden_state(self, text):

        embedded = self.dropout(self.embedding(text))
        
        outputs, (hidden, cell) = self.lstm(embedded)
        
        # return [bathch size, output dim]
        return torch.mean(outputs, 0, keepdim=False)


In [7]:
####################################
#      Build the Discriminator     #
####################################
class Discriminator(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    def forward(self, input):
        out = self.fc1(input)
        out = self.relu(out)
        out = self.fc2(out)
        # features = features.view(x.shape[0], -1)
        # out = F.log_softmax(out, dim=1)
        return out

In [8]:
def train(model, iterator, optimizer, criterion, tag_pad_idx):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        text = batch.text
        tags = batch.tag_label
        
        optimizer.zero_grad()
        
        #text = [sent len, batch size]
        
        predictions = model(text)
        
        #predictions = [sent len, batch size, output dim]
        #tags = [sent len, batch size]
        
        predictions = predictions.view(-1, predictions.shape[-1])
        tags = tags.view(-1)
        
        #predictions = [sent len * batch size, output dim]
        #tags = [sent len * batch size]
        
        loss = criterion(predictions, tags)
                
        acc = categorical_accuracy(predictions, tags, tag_pad_idx)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [9]:

def train_ADV(model,
        discriminator_age,
        discriminator_gender,
        iterator,
        optimizer,
        optimizer_age,
        optimizer_gender,
        criterion,
        criterion_age,
        criterion_gender,
        tag_pad_idx,
        LAMBDA_1,
        LAMBDA_2):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        text = batch.text
        tags = batch.tag_label
        
        
        h = model.hidden_state(text)
        
        """
        Update age discriminitor
        """
        y_age = batch.age_label
        
        y_age_pred = discriminator_age(h).squeeze()
        
        age_loss = LAMBDA_1 * criterion_age(y_age_pred, y_age)
        
        optimizer_age.zero_grad()
        age_loss.backward(retain_graph=True)
        optimizer_age.step()
        
        """
        Update gender discriminitor
        """
        y_gender = batch.gender_label
        y_gender_pred = discriminator_gender(h).squeeze()
        
        gender_loss = LAMBDA_1 * criterion_gender(y_gender_pred, y_gender)
        
        optimizer_gender.zero_grad()
        gender_loss.backward(retain_graph=True)
        optimizer_gender.step()
        
        
        """
        Update Tagger
        """
        y_gender_pred = discriminator_gender(h).squeeze()
        
        gender_loss = criterion_gender(y_gender_pred, y_gender)
        
        y_age_pred = discriminator_age(h).squeeze()
        
        age_loss = criterion_age(y_age_pred, y_age)


        optimizer.zero_grad()
        
        #text = [sent len, batch size]
        
        predictions = model(text)
        
        #predictions = [sent len, batch size, output dim]
        #tags = [sent len, batch size]
        
        predictions = predictions.view(-1, predictions.shape[-1])
        tags = tags.view(-1)
        
        #predictions = [sent len * batch size, output dim]
        #tags = [sent len * batch size]
        
        # print((age_loss + gender_loss))

        loss = criterion(predictions, tags) - LAMBDA_2 * (age_loss + gender_loss)
                
        acc = categorical_accuracy(predictions, tags, tag_pad_idx)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)



In [10]:

def evaluate(model, iterator, criterion, tag_pad_idx):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            text = batch.text
            tags = batch.tag_label
            
            predictions = model(text)
            
            predictions = predictions.view(-1, predictions.shape[-1])
            tags = tags.view(-1)
            
            loss = criterion(predictions, tags)
            
            acc = categorical_accuracy(predictions, tags, tag_pad_idx)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)


In [11]:
   
## Set random seed
torch.manual_seed(SEED)
np.random.seed(SEED)

In [12]:
####################################
#          Preparing Data          #
####################################
# 1. data.Field()
TEXT = data.Field(lower = True)
TAG_LABEL = data.Field(unk_token = None)
AGE_LABEL = data.LabelField()
GENDER_LABEL = data.LabelField()
fields = {'text':('text', TEXT), 
        'tag_label':('tag_label', TAG_LABEL),
        'age_label':('age_label', AGE_LABEL),
        'gender_label':('gender_label', GENDER_LABEL)}
# data.TabularDataset
train_data, valid_data, test_data = data.TabularDataset.splits(path=TrustPilot_processed_dataset_path,
                                                            train="train.jsonl",
                                                            validation = "valid.jsonl",
                                                            test="test.jsonl",
                                                            fields=fields,
                                                            format="json")
we_train_data, we_valid_data, we_test_data = data.TabularDataset.splits(path=WebEng_processed_dataset_path,
                                                            train="train.jsonl",
                                                            validation = "valid.jsonl",
                                                            test="test.jsonl",
                                                            fields=fields,
                                                            format="json")
# data.BucketIterator
train_iter, valid_iter, test_iter = data.BucketIterator.splits((train_data, valid_data, test_data),
                                                            batch_size=BATCH_SIZE,
                                                            device=device,
                                                            sort_key=lambda x: len(x.text))
we_train_iter, we_valid_iter, we_test_iter = data.BucketIterator.splits((we_train_data, we_valid_data, we_test_data),
                                                            batch_size=BATCH_SIZE,
                                                            device=device,
                                                            sort_key=lambda x: len(x.text))

In [13]:
# 5. Build vocab
# TEXT.build_vocab(train_data)
TAG_LABEL.build_vocab(train_data)
AGE_LABEL.build_vocab(train_data)
GENDER_LABEL.build_vocab(train_data)
TEXT.build_vocab(we_train_data)

In [14]:
# Parameters
INPUT_DIM = len(TEXT.vocab)
TAG_OUTPUT_DIM = len(TAG_LABEL.vocab)
AGE_OUTPUT_DIM = len(AGE_LABEL.vocab)
GENDER_OUTPUT_DIM = len(GENDER_LABEL.vocab)
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]

In [15]:
# Create an Age Discriminator instance
discriminator_age = Discriminator(input_size=HIDDEN_DIM*2,
                                hidden_size=HIDDEN_DIM,
                                num_classes=2)
discriminator_age.to(device)
criterion_age = nn.CrossEntropyLoss().to(device)
optimizer_age = optim.Adam(discriminator_age.parameters(), lr=LEARNING_RATE)
# Create a Gender Discriminator instance
discriminator_gender = Discriminator(input_size=HIDDEN_DIM*2,
                                hidden_size=HIDDEN_DIM,
                                num_classes=2)
discriminator_gender.to(device)
criterion_gender = nn.CrossEntropyLoss().to(device)
optimizer_gender = optim.Adam(discriminator_gender.parameters(), lr=LEARNING_RATE)

# Create a Tagger instance
model = BiLSTMPOSTagger(INPUT_DIM, 
                        EMBEDDING_DIM, 
                        HIDDEN_DIM, 
                        TAG_OUTPUT_DIM, 
                        N_LAYERS, 
                        BIDIRECTIONAL, 
                        DROPOUT, 
                        PAD_IDX)
model.apply(init_weights)
model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

TAG_PAD_IDX = TAG_LABEL.vocab.stoi[TAG_LABEL.pad_token]
criterion = nn.CrossEntropyLoss(ignore_index = TAG_PAD_IDX)

model = model.to(device)
criterion = criterion.to(device)

In [22]:
best_valid_loss = float('inf')
best_epoch = -1
    
saved_model = model_name.format(SAMPLING_INDEX+1)
    
for epoch in trange(50):    
    train_loss, train_acc = train(model, we_train_iter, optimizer, criterion, TAG_PAD_IDX)
    valid_loss, valid_acc = evaluate(model, we_valid_iter, criterion, TAG_PAD_IDX)
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        best_epoch = epoch
        torch.save(model.state_dict(), saved_model)
model.load_state_dict(torch.load(saved_model))

100%|██████████| 50/50 [04:55<00:00,  5.91s/it]


<All keys matched successfully>

In [23]:
evaluate_bias(model = model, 
                iterator = test_iter,
                tag_pad_idx = TAG_PAD_IDX,
                return_value = False)

Overall
Accuracy : 0.866883
F Score : 0.779946
Age group
Accuracy Under35: 0.854620 V.S. Over45: 0.885081
F Score Under35: 0.765699 V.S. Over45: 0.808810
Gender group
Accuracy Female: 0.847007 V.S. Male: 0.878361
F Score Female: 0.750995 V.S. Male: 0.800990


0

In [27]:
best_valid_loss = float('inf')
best_epoch_eval_score = -1
best_epoch_eval = None

for epoch in trange(50):    
    # train_loss, train_acc = train(model, train_iter, optimizer, criterion, TAG_PAD_IDX)
    train_loss, train_acc = train_ADV(model,
                                    discriminator_age,
                                    discriminator_gender,
                                    train_iter,
                                    optimizer,
                                    optimizer_age,
                                    optimizer_gender,
                                    criterion,
                                    criterion_age,
                                    criterion_gender,
                                    TAG_PAD_IDX,
                                    1e+1,
                                    1e-3)
    epoch_eval = evaluate_bias(model = model, 
                                iterator = test_iter,
                                tag_pad_idx = TAG_PAD_IDX,
                                return_value = True)

    accuracy_selection = (epoch_eval["acc_O"]
                        + epoch_eval["acc_U"]
                        - abs(epoch_eval["acc_O"]- epoch_eval["acc_U"]) 
                        + epoch_eval["acc_M"]
                        + epoch_eval["acc_F"]
                        - abs(epoch_eval["acc_M"]-epoch_eval["acc_F"])
                        )
    if accuracy_selection >= best_epoch_eval_score:
        best_epoch_eval = epoch_eval
        best_epoch_eval_score = accuracy_selection

SyntaxError: invalid syntax (<ipython-input-27-a0176ff3f113>, line 32)

In [25]:
best_epoch_eval

{'acc_overall': 0.9204545454545454,
 'acc_O': 0.9375,
 'acc_U': 0.9089673913043478,
 'acc_M': 0.9193341869398207,
 'acc_F': 0.9223946784922394,
 'FScore_overall': 0.8662209113663772,
 'FScore_O': 0.8776014148280086,
 'FScore_U': 0.8591540707051228,
 'FScore_M': 0.8558251795134706,
 'FScore_F': 0.8755147118310305}

In [26]:
min(5,2,3)

2