In [1]:
# !pip install transformers

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

# torchtext libraries
from torchtext.legacy.data import Field, TabularDataset, BucketIterator, Iterator, Dataset

# huggingface libraries
from transformers import BertTokenizer, BertForSequenceClassification


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

### Data Loading

In [5]:
MAX_SEQ_LEN = 128
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.long)
text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True,
                   fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_token=UNK_INDEX)
fields = [('text', text_field), ('label', label_field)]
train, valid, test = TabularDataset.splits(path='preprocessed_data/', train='trn.csv', validation='val.csv',
                                           test='tst.csv', format='CSV', fields=fields, skip_header=True)

In [6]:
train_iter = BucketIterator(train, batch_size=8, sort_key=lambda x: len(x.text),
                            device=device, train=True, sort=True, sort_within_batch=True)
valid_iter = BucketIterator(valid, batch_size=8, sort_key=lambda x: len(x.text),
                            device=device, train=True, sort=True, sort_within_batch=True)
test_iter = Iterator(test, batch_size=8, device=device, train=False, shuffle=False, sort=False)

In [7]:
class BERT(nn.Module):

    def __init__(self):
        super(BERT, self).__init__()

        self.encoder = BertForSequenceClassification.from_pretrained('bert-base-uncased')

    def forward(self, text, label):
        loss, pred = self.encoder(text, labels=label)[:2]
        pred = torch.argmax(pred, dim=1)
        return loss, pred

In [8]:
def train(model,
          optimizer,
          criterion = nn.BCELoss(),
          train_loader = train_iter,
          valid_loader = valid_iter,
          num_epochs = 10,):
    
    # initialize values
    running_loss = 0.0
    total_acc = 0
    # training loop
    # model.train()
    total_acc = 0
    best_val_acc = 0
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        test_acc = 0
        val_acc = 0
        test_seen = 0
        val_seen = 0
        model.train()
        for batch in train_loader:   
            text = batch.text
            labels = batch.label
            labels = labels.to(device)
            text = text.to(device)
            output = model(text, labels)
            loss, pred = output
            test_acc += torch.eq(pred, labels).sum().item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            epoch_loss += loss.item()
            test_seen += len(batch)
        test_acc = test_acc/test_seen
        print('The training accuracy for epoch {epoch} is {test_acc}'.format(epoch=epoch+1, test_acc=test_acc))
        print('The cumulative loss for epoch {epoch} is {epoch_loss}'.format(epoch=epoch+1, epoch_loss=epoch_loss))
        # validation 
        model.eval()
        for batch in valid_loader:   
            text = batch.text
            labels = batch.label
            labels = labels.to(device)
            text = text.to(device)
            output = model(text, labels)
            loss, pred = output
            val_acc += torch.eq(pred, labels).sum().item()
            val_seen += len(batch)
        val_acc = val_acc/val_seen
        print('The validation accuracy for epoch {epoch} is {val_acc}'.format(epoch=epoch+1, val_acc=val_acc))
        if(val_acc > best_val_acc):
          best_val_acc = val_acc
          torch.save(model.state_dict(), 'weights.pt')

In [9]:
model = BERT()
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)
train(model, optimizer=optimizer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

The training accuracy for epoch 1 is 0.9678291860243111
The cumulative loss for epoch 1 is 344.6585331477472
The validation accuracy for epoch 1 is 0.757295611494765
The training accuracy for epoch 2 is 0.9853942595303252
The cumulative loss for epoch 2 is 174.31880762305082
The validation accuracy for epoch 2 is 0.8841612831365561
The training accuracy for epoch 3 is 0.9922039075924394
The cumulative loss for epoch 3 is 104.5520895757436
The validation accuracy for epoch 3 is 0.8713521942526176
The training accuracy for epoch 4 is 0.9950995990581047
The cumulative loss for epoch 4 is 63.46034166841855
The validation accuracy for epoch 4 is 0.8586544887502785
The training accuracy for epoch 5 is 0.9971997708903456
The cumulative loss for epoch 5 is 38.07068478666588
The validation accuracy for epoch 5 is 0.9253731343283582
The training accuracy for epoch 6 is 0.9976452618850633
The cumulative loss for epoch 6 is 30.28521613227167
The validation accuracy for epoch 6 is 0.935509022053909

In [10]:
# TODO ADD TIMER

In [14]:
model = BERT()
model = model.to(device)
model.load_state_dict(torch.load('weights.pt'))

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

<All keys matched successfully>

In [15]:
# TODO ADD PREDICT FUNCTION 
def predict(text, label, model):
  model.eval()
  text = tokenizer.encode(text)
  text=torch.tensor(text)
  text = text.unsqueeze(0)
  label = torch.tensor(label)
  label = label.unsqueeze(0)
  text = text.to(device)
  label = label.to(device)
  model=model.to(device)
  loss, pred = model(text, label)
  return pred.item()
  

In [16]:
text = 'Maine GOP Governor’s Statement On Drug Overdoses Proves ‘Pro-Life Republicans’ Don’t Exist'
label = 0
output = predict(text, label, model)
print(output)

0
