In [1]:
# !pip install transformers

In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim

# torchtext libraries
from torchtext.legacy.data import Field, TabularDataset, BucketIterator, Iterator, Dataset

# huggingface libraries
from transformers import BertTokenizer, BertForSequenceClassification


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

### Data Loading

In [5]:
MAX_SEQ_LEN = 128
PAD_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
UNK_INDEX = tokenizer.convert_tokens_to_ids(tokenizer.unk_token)
label_field = Field(sequential=False, use_vocab=False, batch_first=True, dtype=torch.long)
text_field = Field(use_vocab=False, tokenize=tokenizer.encode, lower=False, include_lengths=False, batch_first=True,
                   fix_length=MAX_SEQ_LEN, pad_token=PAD_INDEX, unk_token=UNK_INDEX)
fields = [('text', text_field), ('label', label_field)]
train, valid, test = TabularDataset.splits(path='preprocessed_data/', train='trn.csv', validation='val.csv',
                                           test='tst.csv', format='CSV', fields=fields, skip_header=True)

In [6]:
train_iter = BucketIterator(train, batch_size=8, sort_key=lambda x: len(x.text),
                            device=device, train=True, sort=True, sort_within_batch=True)
valid_iter = BucketIterator(valid, batch_size=8, sort_key=lambda x: len(x.text),
                            device=device, train=True, sort=True, sort_within_batch=True)
test_iter = Iterator(test, batch_size=8, device=device, train=False, shuffle=False, sort=False)

In [7]:
class BERT(nn.Module):

    def __init__(self):
        super(BERT, self).__init__()

        self.encoder = BertForSequenceClassification.from_pretrained('bert-base-uncased')

    def forward(self, text, label):
        loss, pred = self.encoder(text, labels=label)[:2]
        pred = torch.argmax(pred, dim=1)
        return loss, pred

In [10]:
# Training Function

def train(model,
          optimizer,
          criterion = nn.BCELoss(),
          train_loader = train_iter,
          valid_loader = valid_iter,
          num_epochs = 10,):
    
    # initialize running values
    running_loss = 0.0
#     valid_running_loss = 0.0
    global_step = 0
#     train_loss_list = []
#     valid_loss_list = []

    total_acc = 0
    # training loop
    model.train()
    total_acc = 0
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        acc = 0
        seen = 0
        for batch in train_loader:   
            text = batch.text
            labels = batch.label
            labels = labels.to(device)
            text = text.to(device)
            # text = text.type(torch.LongTensor)
            # labels = labels.type(torch.LongTensor)
            output = model(text, labels)
            loss, pred = output
            acc += torch.eq(pred, labels).sum().item()
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # update running values
            running_loss += loss.item()
            epoch_loss += loss.item()
            global_step += 1
            seen += 8
        acc = acc/seen
        print(acc)
    torch.save(model.state_dict(), 'weights.pt')

In [11]:
model = BERT()
model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=2e-5)
train(model, optimizer=optimizer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

0.9676126240773734
0.9859697123950114
0.9929689488419445
0.9954823110206159
0.9972321201323492
0.9976457113769407
0.9981547467548995
0.9987592262662255
0.9984410791550012
0.9984728938661237


In [12]:
# TODO ADD TIMER

In [13]:
# TODO ADD PREDICT FUNCTION 
def predict(text, label, model):
  text = tokenizer.encode(text)
  text=torch.tensor(text)
  text = text.unsqueeze(0)
  label = torch.tensor(label)
  label = label.unsqueeze(0)
  text = text.to(device)
  label = label.to(device)
  model=model.to(device)
  loss, pred = model(text, label)
  return pred.item()
  

In [19]:
text = 'Leading Republican Corker says Trump immigration order poorly implemented'
label = 1
output = predict(text, label, model)
print(output)

1
