In [2]:
# Libraries
import math
import matplotlib.pyplot as plt
import pandas as pd
import torch

# Preliminaries
from sklearn.model_selection import train_test_split
from torchtext.data import Field, TabularDataset, BucketIterator, Iterator

# Models

import torch.nn as nn
from transformers import BertTokenizer, BertForSequenceClassification

# Training

import torch.optim as optim

# Evaluation

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [11]:
source_folder = '../Data/Processed'
raw_data_path = '../Data/train.csv'
destination_folder = '../Data/Processed'

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Model parameter
MAX_SEQ_LEN = 128
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)

# Fields

label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.float)
id_field = Field(sequential=False, use_vocab=False, batch_first=True,dtype=torch.int)
text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True,
                   fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_token=UNK_INDEX)
fields = [('target', label_field),('text', text_field),('id', id_field)]

# TabularDataset

train, valid, test = TabularDataset.splits(path=source_folder, train='train.csv', validation='valid.csv',
                                           test='test.csv', format='CSV', fields=fields, skip_header=True)

# Iterators

train_iter = BucketIterator(train, batch_size=16, sort_key=lambda x: len(x.text),
                            device=device, train=True, sort=True, sort_within_batch=True)
valid_iter = BucketIterator(valid, batch_size=16, sort_key=lambda x: len(x.text),
                            device=device, train=True, sort=True, sort_within_batch=True)

In [13]:
class BERT(nn.Module):

    def __init__(self):
        super(BERT, self).__init__()

        options_name = "bert-base-uncased"
        self.encoder = BertForSequenceClassification.from_pretrained(options_name)

    def forward(self, text, target):
        loss, text_fea = self.encoder(text, labels=target)[:2]

        return loss, text_fea

In [18]:
def save_checkpoint(save_path, model, valid_loss):

    if save_path == None:
        return
    
    state_dict = {'model_state_dict': model.state_dict(),
                  'valid_loss': valid_loss}
    
    torch.save(state_dict, save_path)
    print(f'Model saved to ==> {save_path}')


def save_metrics(save_path, train_loss_list, valid_loss_list, global_steps_list):

    if save_path == None:
        return
    
    state_dict = {'train_loss_list': train_loss_list,
                  'valid_loss_list': valid_loss_list,
                  'global_steps_list': global_steps_list}
    
    torch.save(state_dict, save_path)
    print(f'Model saved to ==> {save_path}')


def load_metrics(load_path):

    if load_path==None:
        return
    
    state_dict = torch.load(load_path, map_location=device)
    print(f'Model loaded from <== {load_path}')
    
    return state_dict['train_loss_list'], state_dict['valid_loss_list'], state_dict['global_steps_list']

In [16]:
def train(model,
          optimizer,
          criterion = nn.CrossEntropyLoss(),
          train_loader = train_iter,
          valid_loader = valid_iter,
          num_epochs = 8,
          eval_every = len(train_iter) // 2,
          file_path = destination_folder,
          best_valid_loss = float("Inf")):
    
    # initialize running values
    running_loss = 0.0
    valid_running_loss = 0.0
    global_step = 0
    train_loss_list = []
    valid_loss_list = []
    global_steps_list = []

    # training loop
    model.train()
    for epoch in range(num_epochs):
        for (target, text, ids), _ in train_loader:
            target = target.type(torch.LongTensor)           
            target = target.to(device)
            text = text.type(torch.LongTensor)  
            text = text.to(device)
            output = model(text, target)
            loss, _ = output

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # update running values
            running_loss += loss.item()
            global_step += 1

            # evaluation step
            if global_step % eval_every == 0:
                model.eval()
                with torch.no_grad():                    

                    # validation loop
                    for (target, text, ids), _ in valid_loader:
                        target = target.type(torch.LongTensor)           
                        target = target.to(device)
                        text = text.type(torch.LongTensor)  
                        text = text.to(device)
                        output = model(text, target)
                        loss, _ = output
                        
                        valid_running_loss += loss.item()

                # evaluation
                average_train_loss = running_loss / eval_every
                average_valid_loss = valid_running_loss / len(valid_loader)
                train_loss_list.append(average_train_loss)
                valid_loss_list.append(average_valid_loss)
                global_steps_list.append(global_step)

                # resetting running values
                running_loss = 0.0                
                valid_running_loss = 0.0
                model.train()

                # print progress
                print('Epoch [{}/{}], Step [{}/{}], Train Loss: {:.4f}, Valid Loss: {:.4f}'
                      .format(epoch+1, num_epochs, global_step, num_epochs*len(train_loader),
                              average_train_loss, average_valid_loss))
                
                # checkpoint
                if best_valid_loss > average_valid_loss:
                    best_valid_loss = average_valid_loss
                    save_checkpoint(file_path + '/' + 'model.pt', model, best_valid_loss)
                    save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
    
    save_metrics(file_path + '/' + 'metrics.pt', train_loss_list, valid_loss_list, global_steps_list)
    print('Finished Training!')

model = BERT().to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)

train(model=model, optimizer=optimizer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch [1/8], Step [166/2664], Train Loss: 0.6580, Valid Loss: 0.6879
Model saved to ==> ../Data/Processed/model.pt
Model saved to ==> ../Data/Processed/metrics.pt
Epoch [1/8], Step [332/2664], Train Loss: 0.6952, Valid Loss: 0.6923
Epoch [2/8], Step [498/2664], Train Loss: 0.5881, Valid Loss: 0.4854
Model saved to ==> ../Data/Processed/model.pt
Model saved to ==> ../Data/Processed/metrics.pt
Epoch [2/8], Step [664/2664], Train Loss: 0.4487, Valid Loss: 0.4042
Model saved to ==> ../Data/Processed/model.pt
Model saved to ==> ../Data/Processed/metrics.pt
Epoch [3/8], Step [830/2664], Train Loss: 0.4115, Valid Loss: 0.4250
Epoch [3/8], Step [996/2664], Train Loss: 0.3186, Valid Loss: 0.4440
Epoch [4/8], Step [1162/2664], Train Loss: 0.2993, Valid Loss: 0.4376
Epoch [4/8], Step [1328/2664], Train Loss: 0.2468, Valid Loss: 0.4621
Epoch [5/8], Step [1494/2664], Train Loss: 0.2283, Valid Loss: 0.4441
Epoch [5/8], Step [1660/2664], Train Loss: 0.1800, Valid Loss: 0.5845
Epoch [6/8], Step [1826/

In [17]:
model = BERT().to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)

train(model=model, optimizer=optimizer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Epoch [1/8], Step [166/2664], Train Loss: 0.6398, Valid Loss: 0.5111
Model saved to ==> ../Data/Processed/model.pt
Model saved to ==> ../Data/Processed/metrics.pt
Epoch [1/8], Step [332/2664], Train Loss: 0.4595, Valid Loss: 0.4190
Model saved to ==> ../Data/Processed/model.pt
Model saved to ==> ../Data/Processed/metrics.pt
Epoch [2/8], Step [498/2664], Train Loss: 0.4227, Valid Loss: 0.4225
Epoch [2/8], Step [664/2664], Train Loss: 0.3016, Valid Loss: 0.4530
Epoch [3/8], Step [830/2664], Train Loss: 0.3068, Valid Loss: 0.4244
Epoch [3/8], Step [996/2664], Train Loss: 0.2215, Valid Loss: 0.5184
Epoch [4/8], Step [1162/2664], Train Loss: 0.2044, Valid Loss: 0.4905
Epoch [4/8], Step [1328/2664], Train Loss: 0.1616, Valid Loss: 0.5757
Epoch [5/8], Step [1494/2664], Train Loss: 0.1344, Valid Loss: 0.6022
Epoch [5/8], Step [1660/2664], Train Loss: 0.1320, Valid Loss: 0.6244
Epoch [6/8], Step [1826/2664], Train Loss: 0.1051, Valid Loss: 0.8336
Epoch [6/8], Step [1992/2664], Train Loss: 0.095