In [1]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import f1_score
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.autograd as autograd
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence

import torchtext
from torchtext.data import get_tokenizer
from torchtext import data, datasets

import json
import random

from tqdm import tqdm, trange

from POS_utils import *

In [2]:
# Dataset path
TrustPilot_processed_dataset_path = "..//dataset//TrustPilot_processed//"

# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

mode_save_path = 'Models\\'
model_name = 'baseline_Adversial_{}.pt'

In [3]:
####################################
#         Hyper-parameters         #
####################################
BATCH_SIZE = 64
LEARNING_RATE = 1e-3
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
N_LAYERS = 2
BIDIRECTIONAL = True
DROUPOUT = 0.5
NUM_EPOCHS = 100
LAMBDA = 1e-2

In [4]:
####################################
#          Preparing Data          #
####################################
SEED = 960925


# 1. data.Field()
# TEXT = data.Field(include_lengths=True, pad_token='<pad>', unk_token='<unk>')
TEXT = data.Field(lower = True)
TAG_LABEL = data.Field(unk_token = None)
AGE_LABEL = data.LabelField()
GENDER_LABEL = data.LabelField()

In [5]:
fields = {'text':('text', TEXT), 
          'tag_label':('tag_label', TAG_LABEL),
          'age_label':('age_label', AGE_LABEL),
          'gender_label':('gender_label', GENDER_LABEL)}

In [6]:
# train, val, test
# 2. data.TabularDataset
train_data, valid_data, test_data = data.TabularDataset.splits(path=TrustPilot_processed_dataset_path,
                                                               train="train.jsonl",
                                                               validation = "valid.jsonl",
                                                               test="test.jsonl",
                                                               fields=fields,
                                                               format="json")

In [7]:
print("Number of train_data = {}".format(len(train_data)))
print("Number of valid_data = {}".format(len(valid_data)))
print("Number of test_data = {}\n".format(len(test_data)))

Number of train_data = 486
Number of valid_data = 54
Number of test_data = 60



In [8]:
# 4. data.BucketIterator
train_iter, valid_iter, test_iter = data.BucketIterator.splits((train_data, valid_data, test_data),
                                                               batch_size=BATCH_SIZE,
                                                               device=device,
                                                               sort_key=lambda x: len(x.text))

# 5. Build vocab
# TEXT.build_vocab(train_data)
TAG_LABEL.build_vocab(train_data)
AGE_LABEL.build_vocab(train_data)
GENDER_LABEL.build_vocab(train_data)


In [9]:
MIN_FREQ = 2

TEXT.build_vocab(train_data, 
                 min_freq = MIN_FREQ,
                 vectors = "glove.6B.100d",
                 unk_init = torch.Tensor.normal_)

In [10]:
class BiLSTMPOSTagger(nn.Module):
    def __init__(self, 
                 input_dim, 
                 embedding_dim, 
                 hidden_dim, 
                 output_dim, 
                 n_layers, 
                 bidirectional, 
                 dropout, 
                 pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(input_dim, embedding_dim, padding_idx = pad_idx)
        
        self.lstm = nn.LSTM(embedding_dim, 
                            hidden_dim, 
                            num_layers = n_layers, 
                            bidirectional = bidirectional,
                            dropout = dropout if n_layers > 1 else 0)
        
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):

        #text = [sent len, batch size]
        
        #pass text through embedding layer
        embedded = self.dropout(self.embedding(text))
        
        #embedded = [sent len, batch size, emb dim]
        
        #pass embeddings into LSTM
        outputs, (hidden, cell) = self.lstm(embedded)
        
        #outputs holds the backward and forward hidden states in the final layer
        #hidden and cell are the backward and forward hidden and cell states at the final time-step
        
        #output = [sent len, batch size, hid dim * n directions]
        #hidden/cell = [n layers * n directions, batch size, hid dim]
        
        #we use our outputs to make a prediction of what the tag should be
        predictions = self.fc(self.dropout(outputs))
        
        #predictions = [sent len, batch size, output dim]
        
        return predictions
    
    def hidden_state(self, text):

        #text = [sent len, batch size]
        
        #pass text through embedding layer
        embedded = self.dropout(self.embedding(text))
        
        #embedded = [sent len, batch size, emb dim]
        
        #pass embeddings into LSTM
        outputs, (hidden, cell) = self.lstm(embedded)
        
        #outputs holds the backward and forward hidden states in the final layer
        #hidden and cell are the backward and forward hidden and cell states at the final time-step
        
        #output = [sent len, batch size, hid dim * n directions]
        #hidden/cell = [n layers * n directions, batch size, hid dim]
        
        
        # hidden state = [batch size, sent len * hid dim * n directions]
        # hs = torch.stack([torch.squeeze(i.reshape((outputs.shape[0]*outputs.shape[2],-1))) for i in torch.torch.chunk(outputs, outputs.shape[1], 1)], dim=0)
        return torch.mean(outputs, 0, keepdim=False)

In [11]:
####################################
#      Build the Discriminator     #
####################################
class Discriminator(nn.Module):

    def __init__(self, input_size, hidden_size, num_classes):
        super(Discriminator, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)


    def forward(self, input):
        out = self.fc1(input)
        out = self.relu(out)
        out = self.fc2(out)
        # features = features.view(x.shape[0], -1)
        out = F.log_softmax(out, dim=1)
        return out

In [12]:
# Parameters
INPUT_DIM = len(TEXT.vocab)
TAG_OUTPUT_DIM = len(TAG_LABEL.vocab)
AGE_OUTPUT_DIM = len(AGE_LABEL.vocab)
GENDER_OUTPUT_DIM = len(GENDER_LABEL.vocab)
N_LAYERS = 2
BIDIRECTIONAL = True
DROPOUT = 0.25
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]


model = BiLSTMPOSTagger(INPUT_DIM, 
                        EMBEDDING_DIM, 
                        HIDDEN_DIM, 
                        TAG_OUTPUT_DIM, 
                        N_LAYERS, 
                        BIDIRECTIONAL, 
                        DROPOUT, 
                        PAD_IDX)

In [13]:
# Create an Age Discriminator instance
discriminator_age = Discriminator(input_size=HIDDEN_DIM*2,
                                  hidden_size=HIDDEN_DIM,
                                  num_classes=2)

discriminator_age.to(device)

criterion_age = nn.CrossEntropyLoss().to(device)
optimizer_age = optim.Adam(discriminator_age.parameters(), lr=LEARNING_RATE)

In [14]:
# Create an Gender Discriminator instance
discriminator_gender = Discriminator(input_size=HIDDEN_DIM*2,
                                  hidden_size=HIDDEN_DIM,
                                  num_classes=2)

discriminator_gender.to(device)

criterion_gender = nn.CrossEntropyLoss().to(device)
optimizer_gender = optim.Adam(discriminator_gender.parameters(), lr=LEARNING_RATE)

In [15]:
model.apply(init_weights)

BiLSTMPOSTagger(
  (embedding): Embedding(766, 100, padding_idx=1)
  (lstm): LSTM(100, 256, num_layers=2, dropout=0.25, bidirectional=True)
  (fc): Linear(in_features=512, out_features=13, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
)

In [16]:
print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 2,393,413 trainable parameters


In [17]:
pretrained_embeddings = TEXT.vocab.vectors

print(pretrained_embeddings.shape)

torch.Size([766, 100])


In [18]:
model.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[ 0.9130, -0.5882, -1.1043,  ..., -0.5646, -1.2453,  2.2959],
        [ 1.4369,  1.5932, -0.4207,  ..., -0.2081, -0.3758, -1.7615],
        [-0.3398,  0.2094,  0.4635,  ..., -0.2339,  0.4730, -0.0288],
        ...,
        [-0.1194,  0.5419,  0.6217,  ..., -0.3805, -0.0422,  0.3516],
        [ 0.3576,  0.0981, -0.1268,  ...,  0.2066, -0.1067, -0.4127],
        [-0.0263,  0.0179, -0.5016,  ..., -0.8688,  0.9409, -0.2882]])

In [19]:

model.embedding.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

print(model.embedding.weight.data)

tensor([[ 0.9130, -0.5882, -1.1043,  ..., -0.5646, -1.2453,  2.2959],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.3398,  0.2094,  0.4635,  ..., -0.2339,  0.4730, -0.0288],
        ...,
        [-0.1194,  0.5419,  0.6217,  ..., -0.3805, -0.0422,  0.3516],
        [ 0.3576,  0.0981, -0.1268,  ...,  0.2066, -0.1067, -0.4127],
        [-0.0263,  0.0179, -0.5016,  ..., -0.8688,  0.9409, -0.2882]])


In [20]:
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [21]:
TAG_PAD_IDX = TAG_LABEL.vocab.stoi[TAG_LABEL.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index = TAG_PAD_IDX)

In [22]:
model = model.to(device)
criterion = criterion.to(device)

In [23]:
def train(model, iterator, optimizer, criterion, tag_pad_idx, LAMBDA):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.train()
    
    for batch in iterator:
        
        text = batch.text
        tags = batch.tag_label
        
        
        h = model.hidden_state(text)
        
        """
        Update age discriminitor
        """
        y_age = batch.age_label
        
        y_age_pred = discriminator_age(h).squeeze()
        
        age_loss = criterion_age(y_age_pred, y_age)
        
        optimizer_age.zero_grad()
        age_loss.backward(retain_graph=True)
        optimizer_age.step()
        
        """
        Update gender discriminitor
        """
        y_gender = batch.gender_label
        y_gender_pred = discriminator_gender(h).squeeze()
        
        gender_loss = criterion_gender(y_gender_pred, y_gender)
        
        optimizer_gender.zero_grad()
        gender_loss.backward(retain_graph=True)
        optimizer_gender.step()
        
        
        """
        Update Tagger
        """
        optimizer.zero_grad()
        
        #text = [sent len, batch size]
        
        predictions = model(text)
        
        #predictions = [sent len, batch size, output dim]
        #tags = [sent len, batch size]
        
        predictions = predictions.view(-1, predictions.shape[-1])
        tags = tags.view(-1)
        
        #predictions = [sent len * batch size, output dim]
        #tags = [sent len * batch size]
        
        loss = criterion(predictions, tags) - LAMBDA * age_loss - LAMBDA * gender_loss
                
        acc = categorical_accuracy(predictions, tags, tag_pad_idx)
        
        loss.backward()
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [24]:

def evaluate(model, iterator, criterion, tag_pad_idx):
    
    epoch_loss = 0
    epoch_acc = 0
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:

            text = batch.text
            tags = batch.tag_label
            
            h = model.hidden_state(text)

            y_age = batch.age_label

            y_age_pred = discriminator_age(h).squeeze()

            age_loss = criterion_age(y_age_pred, y_age)

            y_gender = batch.gender_label
            y_gender_pred = discriminator_gender(h).squeeze()

            gender_loss = criterion_gender(y_gender_pred, y_gender)

            predictions = model(text)

            predictions = predictions.view(-1, predictions.shape[-1])
            tags = tags.view(-1)

            loss = criterion(predictions, tags) - LAMBDA * age_loss - LAMBDA * gender_loss
            
            acc = categorical_accuracy(predictions, tags, tag_pad_idx)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
        
    return epoch_loss / len(iterator), epoch_acc / len(iterator)

In [25]:
for repeated_time in range(5):
    best_valid_loss = float('inf')
    best_epoch = -1
    
    saved_model = mode_save_path+model_name.format(repeated_time+1)
    
    for epoch in range(NUM_EPOCHS):    
        train_loss, train_acc = train(model, train_iter, optimizer, criterion, TAG_PAD_IDX, LAMBDA)
        valid_loss, valid_acc = evaluate(model, valid_iter, criterion, TAG_PAD_IDX)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_epoch = epoch
            torch.save(model.state_dict(), saved_model)

    model.load_state_dict(torch.load(saved_model))
    print("Bset epoch: {:3f}".format(best_epoch))
    evaluate_bias(model, test_iter, TAG_PAD_IDX)

Overall
Accuracy : 0.883117
F Score : 0.859736
Age group
Accuracy Under35: 0.870924 V.S. Over45: 0.901210
F Score Under35: 0.851976 V.S. Over45: 0.874331
Gender group
Accuracy Female: 0.880266 V.S. Male: 0.884763
F Score Female: 0.859096 V.S. Male: 0.851337
Overall
Accuracy : 0.883117
F Score : 0.861036
Age group
Accuracy Under35: 0.870924 V.S. Over45: 0.901210
F Score Under35: 0.852669 V.S. Over45: 0.876015
Gender group
Accuracy Female: 0.893570 V.S. Male: 0.877081
F Score Female: 0.877868 V.S. Male: 0.843049
Overall
Accuracy : 0.887175
F Score : 0.865991
Age group
Accuracy Under35: 0.879076 V.S. Over45: 0.899194
F Score Under35: 0.857208 V.S. Over45: 0.881130
Gender group
Accuracy Female: 0.904656 V.S. Male: 0.877081
F Score Female: 0.889040 V.S. Male: 0.846648
Overall
Accuracy : 0.888799
F Score : 0.865449
Age group
Accuracy Under35: 0.880435 V.S. Over45: 0.901210
F Score Under35: 0.856380 V.S. Over45: 0.880961
Gender group
Accuracy Female: 0.902439 V.S. Male: 0.880922
F Score Femal