In [1]:
import numpy as np
import pickle
import random
import torch

from sklearn.metrics import classification_report
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.nn.utils import clip_grad_norm_ as clip_grad_norm
from torch.utils.data import DataLoader

In [2]:
seed = 1

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print(torch.randn(5))

tensor([ 0.6614,  0.2669,  0.0617,  0.6213, -0.4519])


# Common functions

In [3]:
def train(model, trn_dataloader, optimizer, criterion, device=torch.device('cpu')):
    total_batch, correct_pred, total_sample = 0, 0, 0
    model.train()

    for batch, label in trn_dataloader:
        total_batch += 1

        batch = batch.to(device)
        label = label.to(device)

        optimizer.zero_grad()
        output = model(batch)
        loss = criterion(output, label)
        loss.backward()
        clip_grad_norm(model.parameters(), 1.0)
        optimizer.step()
        trn_loss = loss.data.item()
        pred_label = output.argmax(dim=1)
        correct_pred += (pred_label == label).sum()
        total_sample += label.size()[0]

        if total_batch % 50 == 0:
            print(f"#{total_batch}: trn loss = {trn_loss:.4f} | trn acc = {(correct_pred/total_sample):.4f}")
        
    return model

In [4]:
def eval(model, dataloader, criterion, device=torch.device('cpu')):
    total_loss, total_batch = 0, 0
    correct_pred, total_sample = 0, 0
    model.eval()

    for batch, label in dataloader:
        batch = batch.to(device)
        label = label.to(device)

        output = model(batch)

        total_batch += 1
        loss = criterion(output, label)
        total_loss += loss.data.item()

        pred_label = output.argmax(dim=1)
        correct_pred += (pred_label == label).sum()
        total_sample += label.size()[0]

    avg_loss = total_loss / total_batch
    acc = correct_pred.item() / total_sample

    return avg_loss, acc

# Load data

In [5]:
y_trn = pickle.load(open("../preprocessed_embeddings/elmo_trn_title_labels.pkl", "rb"))
y_val = pickle.load(open("../preprocessed_embeddings/elmo_val_title_labels.pkl", "rb"))
y_tst = pickle.load(open("../preprocessed_embeddings/elmo_tst_title_labels.pkl", "rb"))

In [6]:
x_trn = pickle.load(open("../preprocessed_embeddings/elmo_trn_title.pkl", "rb")).tolist()
x_val = pickle.load(open("../preprocessed_embeddings/elmo_val_title.pkl", "rb")).tolist()
x_tst = pickle.load(open("../preprocessed_embeddings/elmo_tst_title.pkl", "rb")).tolist()

In [7]:
batch_size = 128

### Training set
trn_dataset = []
for i in range(len(x_trn)):
    trn_dataset.append((torch.tensor(x_trn[i]), y_trn[i]))

del x_trn
del y_trn
trn_dataloader = DataLoader(trn_dataset, batch_size)

### Validation set
val_dataset = []
for i in range(len(x_val)):
    val_dataset.append((torch.tensor(x_val[i]), y_val[i]))

del x_val
del y_val
val_dataloader = DataLoader(val_dataset, batch_size)

### Test set
tst_dataset = []
for i in range(len(x_tst)):
    tst_dataset.append((torch.tensor(x_tst[i]), y_tst[i]))

del x_tst
del y_tst
tst_dataloader = DataLoader(tst_dataset, batch_size)

# Convolutional Neural Network

In [8]:
class CNN(nn.Module):
    def __init__(self, drop_rate=0.0, kernel_size=4, embed_size=1024, class_size=2):
        super(CNN, self).__init__()

        # ---------------------------------
        # Configuration
        self.dropout = nn.Dropout(drop_rate)
        self.filter = nn.Conv1d(embed_size, embed_size, kernel_size)
        self.fc = nn.Linear(embed_size, class_size)

    def forward(self, batch):
        # Batch -> B x L x E
        x = batch.permute(0,2,1) # x -> B x E x L
        x = self.dropout(x)

        x = self.filter(x)
        x = torch.sigmoid(x)
        x = torch.max(x, dim=2)[0]

        out = self.fc(x)

        return out

In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = CNN().to(device)

optimizer = optim.Adam(
    params=model.parameters(),
    lr=0.001
)

criterion = nn.CrossEntropyLoss()

In [10]:
for e in range(5):
    print(f'Epoch {e}\n') if e == 0 else print(f'\nEpoch {e}\n')
    model = train(model, trn_dataloader, optimizer, criterion, device)
    trn_loss, trn_acc = eval(model, trn_dataloader, criterion, device)
    val_loss, val_acc = eval(model, val_dataloader, criterion, device)
    print(f"\ntrn loss = {trn_loss} trn acc = {trn_acc} val loss = {val_loss} val acc = {val_acc}")

Epoch 0

#50: trn loss = 0.0015 | trn acc = 0.9567
#100: trn loss = 0.0004 | trn acc = 0.9776
#150: trn loss = 0.0019 | trn acc = 0.9831
#200: trn loss = 0.0377 | trn acc = 0.9867

trn loss = 0.05926284155394296 trn acc = 0.9781514232233912 val loss = 0.16791883727649162 val acc = 0.9443126623743364

Epoch 1

#50: trn loss = 0.0012 | trn acc = 0.9955
#100: trn loss = 0.0003 | trn acc = 0.9977
#150: trn loss = 0.0002 | trn acc = 0.9982
#200: trn loss = 0.0004 | trn acc = 0.9985

trn loss = 0.012550897891626327 trn acc = 0.9948363777189698 val loss = 0.047145395779391815 val acc = 0.9818140743250875

Epoch 2

#50: trn loss = 0.0000 | trn acc = 0.9991
#100: trn loss = 0.0001 | trn acc = 0.9995
#150: trn loss = 0.0000 | trn acc = 0.9996
#200: trn loss = 0.0000 | trn acc = 0.9997

trn loss = 0.008424548098660375 trn acc = 0.9964177370425353 val loss = 0.042913020557567245 val acc = 0.9871230091494408

Epoch 3

#50: trn loss = 0.0000 | trn acc = 0.9992
#100: trn loss = 0.0000 | trn acc = 0.9

In [11]:
val_loss, val_acc = eval(model, val_dataloader, criterion, device)
print(f"Validation Loss: {val_loss}")
print(f"Validation Acc: {val_acc}")

Validation Loss: 0.018285365798471634
Validation Acc: 0.9951428894160171


In [12]:
tst_loss, tst_acc = eval(model, tst_dataloader, criterion, device)
print(f"Test Loss: {tst_loss}")
print(f"Test Acc: {tst_acc}")

Test Loss: 0.032086542630161474
Test Acc: 0.989159891598916


In [51]:
predictions = []
true_labels = []
for batch, labels in tst_dataloader:
    batch_predictions = model(batch.to(device)).argmax(dim=1).to(torch.device('cpu')).tolist()
    predictions.extend(batch_predictions)
    true_labels.extend(labels.tolist())
    
print(classification_report(true_labels, predictions))

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      2286
           1       1.00      0.98      0.99      2142

    accuracy                           0.99      4428
   macro avg       0.99      0.99      0.99      4428
weighted avg       0.99      0.99      0.99      4428

