In [None]:
import matplotlib.pyplot as plt
import numpy as np

import pandas as pd
import seaborn as sns
import time
import torch
from sklearn.metrics import confusion_matrix
import random
import torch.nn as nn
import torch.optim as optim
from IPython import display
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

from dataset import InstagramDataset, VineDataset
from dataloader import MyDataLoader
from model import HierarchicalAttentionNetwork
from utils import get_pretrained_weights
from utils import MetricTracker


# set device
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print('Device name = %s' %( torch.cuda.get_device_name(device) if device != 'cpu' else 'CPU' ))

torch.manual_seed(1)
np.random.seed(7)
sns.set(style="white", palette="muted", color_codes=True, context="talk")

%matplotlib inline

## Configuration

In [None]:
# define the config here
class Config:
    def __init__(self):
        self.batch_size = 64
        self.num_epochs = 25
        self.lr = 3e-3
        self.max_grad_norm = 5

        self.embed_dim = 100
        self.word_gru_hidden_dim = 100
        self.sent_gru_hidden_dim = 100
        self.word_gru_num_layers = 1
        self.sent_gru_num_layers = 1
        self.word_att_dim = 200
        self.sent_att_dim = 200

        self.vocab_path = 'data/glove/glove.6B.100d.txt'

        # use Glove or not
        self.pretrain = True
        self.freeze = False

        self.use_layer_norm = True
        self.dropout = 0.1

# get instance
config = Config()

## Load the dataset

In [None]:
# load the data here
dataset = InstagramDataset('./data/instagram/instagram_text.tsv')
# dataset = VineDataset('./data/vine/vine_full_sessions_pos_970.json', './data/vine/vine_bully.cls')

dataset.create_comment_labels()

# print data groups dist
groups = {0:0, 1:0}
for i in range(len(dataset)):
    groups[dataset.group[i]] += 1
print('Groups Dist:', groups)

# create test and train sets
test_size = int(len(dataset) * 0.2)
train_size = len(dataset) - test_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

# create data loaders
dataloader = MyDataLoader(train_dataset, config.batch_size)
testloader = MyDataLoader(test_dataset, config.batch_size)

# Define the model here

In [None]:
# classifier here
model = HierarchicalAttentionNetwork(
    num_classes=dataset.num_classes,
    vocab_size=dataset.vocab_size,
    embed_dim=config.embed_dim,
    word_gru_hidden_dim=config.word_gru_hidden_dim,
    sent_gru_hidden_dim=config.sent_gru_hidden_dim,
    word_gru_num_layers=config.word_gru_num_layers,
    sent_gru_num_layers=config.sent_gru_num_layers,
    word_att_dim=config.word_att_dim,
    sent_att_dim=config.sent_att_dim,
    use_layer_norm=config.use_layer_norm,
    dropout=config.dropout).to(device)

# load pretrained word embeddings here
if config.pretrain:
    weights = get_pretrained_weights("data/glove", dataset.vocab, config.embed_dim, device)
    model.sent_attention.word_attention.init_embeddings(weights)
    model.sent_attention.word_attention.freeze_embeddings(config.freeze)


# pretrain the classifier a bit
N_CLF_EPOCHS = 5

optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=config.lr)
criterion = nn.CrossEntropyLoss(reduction='sum').to(device)

losses = MetricTracker()
accs = MetricTracker()

for epoch_idx in range(N_CLF_EPOCHS):
    # reset model
    model.train()
    losses.reset()
    accs.reset()

    for batch_idx, (docs, labels, doc_lengths, sent_lengths, _) in enumerate(dataloader):
        batch_size = labels.size(0)

        docs = docs.to(device)
        labels = labels.to(device)
        sent_lengths = sent_lengths.to(device)
        doc_lengths = doc_lengths.to(device)

        scores, word_att_weights, sentence_att_weights = model(docs, doc_lengths, sent_lengths)
        optimizer.zero_grad()
        loss = criterion(scores, labels)
        loss.backward()

        if config.max_grad_norm is not None:
            torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)
        optimizer.step()

        # Compute accuracy
        predictions = scores.max(dim=1)[1]
        correct_predictions = torch.eq(predictions, labels).sum().item()
        acc = correct_predictions

        losses.update(loss.item(), batch_size)
        accs.update(acc, batch_size)

        if batch_idx % 10 == 0:
            print('\tEpoch: [{0}][{1}/{2}]\t Loss {loss.val:.4f}(avg: {loss.avg:.4f})\t Acc {acc.val:.3f} (avg: {acc.avg:.3f})'.format(
                epoch_idx, batch_idx, len(dataloader), loss=losses, acc=accs))

    print('Epoch: [{0}]\t Avg Loss {loss:.4f}\t Avg Accuracy {acc:.3f}'.format(epoch_idx, loss=losses.avg, acc=accs.avg))

## Defining the RL agent

In [None]:
class PolicyAgent(nn.Module):
    def __init__(self, classifier):
        super(PolicyAgent, self).__init__()
        self.classifier = classifier
        self.softmax_func = nn.Softmax()

    def get_action(self, batchloader):
        for batch in batchloader:
            res = self.classifier(batch[0].to(device), batch[2].to(device), batch[3].to(device))
            break
        return self.softmax_func(res[0])

## Define the environment

In [None]:
class Env:
    def __init__(self, dataset, testloader, utility_criterion):
        self.dataset = dataset
        self.testloader = testloader
        self.utility_criterion = utility_criterion
        self.current_time = 0
        self.session_dataset = []

        # reset the env
        self.reset()

    def get_state(self):
        # return the comments of a single session 1 by 1
        return self.session_dataset[self.current_time], self.label

    def reset(self):
        # init again!
        self.current_time = 0

        # select 1 data randomly
        random_session, self.label, doc_lengths, sent_lengths, comments_labels = dataset.__getitem__(random.randint(0, len(dataset) - 1), True)

        # create different dataloaders
        self.session_dataset = []
        for t in range(1, len(random_session) + 1):
            tmp_session = []
            tmp_sent_lengths = []
            tmp_comment_labels = []
            for i in range(t):
                tmp_session.append(random_session[i])
                tmp_sent_lengths.append(sent_lengths[i])
                tmp_comment_labels.append(comments_labels[i])
            
            # add to data
            self.session_dataset.append(
                MyDataLoader([(tmp_session, self.label, len(tmp_session), tmp_sent_lengths, tmp_comment_labels)], batch_size=1)
            )

    def calc_reward(self, pred_scores, true_labels, alpha=0.5):
        loss = self.utility_criterion(pred_scores, true_labels)
        true_labels_z_1 = []
        true_labels_z_0 = []
        pred_labels_z_1 = []
        pred_labels_z_0 = []
        pred_labels = []
        true_test = []

        # calculate the fairness measure as well
        with torch.no_grad():
            for batch_idx, (docs, labels, doc_lengths, sent_lengths, z) in enumerate(self.testloader):
                batch_size = labels.size(0)

                docs = docs.to(device)
                labels = labels.to(device)
                sent_lengths = sent_lengths.to(device)
                doc_lengths = doc_lengths.to(device)

                scores, _, _ = model(docs, doc_lengths, sent_lengths)

                # Compute accuracy
                predictions = scores.max(dim=1)[1]

                for i, pred in enumerate(predictions):
                    if z[i] == 1:
                        pred_labels_z_1.append(pred.item())
                        true_labels_z_1.append(labels[i].item())
                    else:
                        pred_labels_z_0.append(pred.item())
                        true_labels_z_0.append(labels[i].item())
                    pred_labels.append(pred.item())
                    true_test.append(labels[i].item())

        tn, fp, fn, tp = confusion_matrix(true_test, pred_labels).ravel()
        FPR_overall = fp / (fp + tn + 1e-10)
        FNR_overall = fn / (fn + tp + 1e-10)

        FPR_z = []
        FNR_z = []
        tn, fp, fn, tp = confusion_matrix(true_labels_z_1, pred_labels_z_1).ravel()
        FPR_z.append(fp / (fp + tn + 1e-10))
        FNR_z.append(fn / (fn + tp + 1e-10))
        tn, fp, fn, tp = confusion_matrix(true_labels_z_0, pred_labels_z_0).ravel()
        FPR_z.append(fp / (fp + tn + 1e-10))
        FNR_z.append(fn / (fn + tp + 1e-10))

        FNED = 0
        FPED = 0
        for fp in FPR_z:
            FPED += abs(fp - FPR_overall)
        for fn in FNR_z:
            FNED += abs(fn - FNR_overall)

        return loss + alpha * (2 / ((1 / (FNED + 1e-10)) + (1 / (FPED + 1e-10))))

    def perform_action(self, pred_score, true_label):
        # action is the label
        self.current_time += 1
        
        # check if finished!
        done = True if self.current_time >= len(self.session_dataset) else False

        # calculate the reward etc.
        return self.calc_reward(pred_score, true_label), done


## helper functions
def calc_discounted_rewards(rewards, gamma):
    returns = []
    
    for t in range(len(rewards)):
        ret = 0
        
        for t_p in range(t, len(rewards)):
            ret += gamma ** (t_p - t) * rewards[t_p]
            
        returns.insert(0, ret)
        
    return returns

def calc_discounted_rewards_better(rewards, gamma):
    returns = []
    
    for p, r in enumerate(rewards):
        returns.append((gamma ** p) * r)

    returns = np.cumsum(returns[::-1])
    
    return returns

## Run the RL

In [None]:
GAMMA           = 0.1
MAX_EPISODES    = 500
BASELINE_REWARD = 'mean'

env = Env(train_dataset, dataloader, nn.CrossEntropyLoss(reduction='sum').to(device))
agent = PolicyAgent(model).to(device)
optimizer = optim.Adam(params=filter(lambda p: p.requires_grad, model.parameters()), lr=1e-5)
episode_rewards = []

agent.train()
softmax_func = nn.Softmax()

for episode_no in range(MAX_EPISODES):
    rewards = []
    action_probs = []
    actions = []
    
    done = False
    env.reset()
    current_state, true_label = env.get_state()
    
    # go through an episode
    while not done:
        # get action
        action_dist = agent.get_action(current_state) 
        p = action_dist.detach().cpu().flatten()
        action = np.random.multinomial(1, p)[0]
        reward, done = env.perform_action(action_dist, torch.tensor([true_label]).to(device))
        
        # save
        rewards.append(-1 * reward.item())
        actions.append(action)
        action_probs.append(action_dist)

        # go next
        if done:
            break
        current_state, true_label = env.get_state()
    
    # update network after an episode -> monte carlo
    returns = calc_discounted_rewards(rewards, GAMMA)
    
    # calculate loss value
    loss = 0
    
    for i in range(len(rewards)):
        loss += action_probs[i][0, actions[i]] * returns[i]

    if BASELINE_REWARD == 'mean':
        loss = (loss - np.mean(returns)) / len(rewards)
    else:
        loss = (loss - BASELINE_REWARD) / len(rewards)
    
    # update network params
    optimizer.zero_grad()
    loss.backward()
    if config.max_grad_norm is not None:
            torch.nn.utils.clip_grad_norm_(model.parameters(), config.max_grad_norm)
    optimizer.step()
    
    # print 
    episode_rewards.append(np.sum(rewards))
    if episode_no % 10 == 0:
        print('[%d/%d] Mean Reward = %0.4f   Max Reward = %0.4f\t\t\t' %(episode_no, MAX_EPISODES, np.mean(episode_rewards[-50:]), np.max(episode_rewards[-50:])))
    if episode_no % 100 == 0:
        # save the fair model
        torch.save({
            'epoch': 200,
            'model': model,
            'optimizer': optimizer,
        }, 'best_model/vine_rl_model_it_%d.pth.tar' %(episode_no))

## Save the model

In [None]:
# save the fair model
torch.save({
    'epoch': 5,
    'model': model,
    'optimizer': optimizer,
}, 'best_model/model.pth.tar')
