In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from datasets import load_dataset
import pandas as pd

import torch
from torch import nn
from tqdm import tqdm

from data_processing import get_datafrom_tokenizer
from baseline import BERTClass

In [3]:
dataset = load_dataset("silicone", "dyda_da")

Found cached dataset silicone (C:/Users/jerem/.cache/huggingface/datasets/silicone/dyda_da/1.0.0/af617406c94e3f78da85f7ea74ebfbd3f297a9665cb54adbae305b03bc4442a5)


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
simple_train = pd.DataFrame(dataset['train'])[["Utterance", "Label"]]
simple_test = pd.DataFrame(dataset['test'])[["Utterance", "Label"]]
simple_train

Unnamed: 0,Utterance,Label
0,"say , jim , how about going for a few beers af...",1
1,you know that is tempting but is really not go...,0
2,what do you mean ? it will help us to relax .,3
3,do you really think so ? i don't . it will jus...,3
4,i guess you are right.but what shall we do ? i...,3
...,...,...
87165,i want a pair of locus .,1
87166,"take a look at the ones on display , please .",0
87167,i need size 41 .,1
87168,"could i have the check , please ?",1


In [5]:
train_loader, test_loader = get_datafrom_tokenizer(simple_train, simple_test)

In [6]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = BERTClass()
model.to(device)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BERTClass(
  (l1): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    

In [7]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=1e-05)
criterion = nn.CrossEntropyLoss()

In [8]:
def compute_test_loss(net, testloader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            inputs, targets = inputs.to(device), targets.to(device)
            probs = net(inputs)
            loss = criterion(probs, targets)
            test_loss += loss.item() * targets.size(0)
            _, predicted = probs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
        return test_loss/total, 1-correct/total

In [9]:
def train(epoch):
    model.train()
    for _,data in enumerate(train_loader, 0):
        ids = data['ids'].to(device)
        mask = data['mask'].to(device)
        token_type_ids = data['token_type_ids'].to(device)
        targets = data['targets'].to(device)

        outputs = model(ids, mask, token_type_ids)

        optimizer.zero_grad()
        loss = criterion(outputs, targets)
        if _%5000==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [10]:
params_to_update = model.parameters()
print("Params to learn:")
params_to_update = []
for name,param in model.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        print("\t",name)

Params to learn:
	 l3.weight
	 l3.bias


In [11]:
len(train_loader)

1363

In [13]:
train_epoch = 10
epoch_pbar = tqdm(total=train_epoch, leave=False)
for epoch in range(train_epoch):  
        model.train()
        train_loss = 0.0
        correct = 0

        pbar = tqdm(total=len(train_loader), leave=False)
        for i, data in enumerate(train_loader, 0):
            ids = data['ids'].to(device)
            mask = data['mask'].to(device)
            token_type_ids = data['token_type_ids'].to(device)
            targets = data['targets'].to(device)
            
            optimizer.zero_grad()

            outputs = model(ids, mask, token_type_ids)
            # outputs = probs.argmax(1) # to use for accuracy metrics
            _, predicted = outputs.max(1)
            correct += predicted.eq(targets).sum().item()

            loss = criterion(outputs, targets)
            loss.backward()
         
            optimizer.step()
         
            train_loss += loss.item() * len(targets)
            pbar.set_description(f"Train Loss : {loss: .4f}")
            pbar.update()
            
        loss, errors = compute_test_loss(model, test_loader, criterion)
        pbar.set_description(f"Train Loss : {train_loss/len(simple_train): .4f} | Test Loss : {loss: .4f} | Train Error : {1 - correct/len(simple_train): .2%} | Test Error : {errors: .2%}")
        epoch_pbar.set_description(f"Epoch : {epoch}/{train_epoch} | Train Loss : {train_loss/len(simple_train): .4f} | Test Loss : {loss: .4f} | Train Error : {1 - correct/len(simple_train): .2%} | Test Error : {errors: .2%}")
        epoch_pbar.update() 


Train Loss :  1.6077:   0%|          | 5/1363 [00:16<1:13:46,  3.26s/it]
Train Loss :  1.6986:   1%|▏         | 19/1363 [00:31<36:58,  1.65s/it]