## Imports

In [1]:
from transformers import BertTokenizer

# from torch.utils.data import Dataset
from models.CustomDatasetBinary import CustomDatasetBinary
from torch.utils.data import DataLoader

from tqdm import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Datasets & loaders

In [2]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
# starting w/ MD dataset
df = pd.read_csv('../data/MD_LoD.csv', index_col = 0)
df

# exclude labels == 2 (slightly agreement)
df = df[df['disagreement_lev'] != 2]

train_set = CustomDatasetBinary(df[df['split'] == 'train'], tokenizer, label_col = 'disagreement_lev')
val_set = CustomDatasetBinary(df[df['split'] == 'dev'], tokenizer, label_col = 'disagreement_lev')
test_set = CustomDatasetBinary(df[df['split'] == 'test'], tokenizer, label_col = 'disagreement_lev')

print(f'len: \n\tTrain: {len(train_set)}\n\tVal: {len(val_set)}\n\tTest: {len(test_set)}')

train_loader = DataLoader(train_set, batch_size = 16, shuffle = True)
val_loader = DataLoader(val_set, batch_size = 16, shuffle = True)
test_loader = DataLoader(test_set, batch_size = 16, shuffle = True)

len: 
	Train: 20
	Val: 10
	Test: 10


## Model

In [3]:
from models.TransformerBinaryClassifier import TransformerBinaryClassifier

from transformers import BertModel
bert_model = BertModel.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
# input of the model: 
#   input_ids
#   attention_mask
model = TransformerBinaryClassifier(
    model = bert_model,
    dropout_rate = .2,
)
inp, lbl = next(iter(train_loader))
out = model(
    inp['input_ids'],
    inp['attention_mask'],
)
print(out.shape)
out.reshape(len(out)).shape


torch.Size([16, 1])


torch.Size([16])

In [5]:
from models.utils import train_loop
import torch.nn as nn

from torch.optim import Adam
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr = 1e-4)

hystory = train_loop(
    epochs = 5, 
    model = model, 
    device = 'cpu', 
    train_loader = train_loader, 
    val_loader = val_loader, 
    optimizer = optimizer, 
    criterion = criterion,
    log_freq = 50, 
    name = 'test0',
)

-- Epoch 01 --
   [Recap 01 epoch] - train_loss: 0.770, train_acc: 0.3750 | eval_loss: 0.582, eval_acc: 0.9000 | elapsed time: 17.2s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

-- Epoch 02 --
   [Recap 02 epoch] - train_loss: 0.627, train_acc: 0.7188 | eval_loss: 0.542, eval_acc: 0.9000 | elapsed time: 17.3s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

-- Epoch 03 --
   [Recap 03 epoch] - train_loss: 0.613, train_acc: 0.6562 | eval_loss: 0.477, eval_acc: 0.9000 | elapsed time: 17.0s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

-- Epoch 04 --
   [Recap 04 epoch] - train_loss: 0.479, train_acc: 0.7812 | eval_loss: 0.394, eval_acc: 0.9000 | elapsed time: 17.0s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

-- Epoch 05 --
   [Recap 05 epoch] - train_loss: 0.374, train_acc: 0.7812 | eval_loss: 0.336, eval_acc: 0.9000 | elapsed time: 16.9s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

Done


In [None]:
# tmp store
import pickle
with open('objs.pkl', 'w') as f:
    pickle.dump(hystory, f)