## Imports

In [None]:
## Uncomment only on colab
# !pip install transformers
# !git clone https://github.com/DanielSc4/Approach-LWD.git
# %cd Approach-LWD/src

In [38]:
from transformers import AutoTokenizer, AutoModelForMaskedLM
import torch

# from torch.utils.data import Dataset
from models.CustomDatasetBinary import CustomDatasetBinary
from torch.utils.data import DataLoader

from tqdm import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
### models_name = 'bert-base-uncased'

# BERT
# models_name = 'bert-base-uncased'
# RoBERTa
model_name = 'xlm-roberta-base'

In [39]:
# selecting device based on whats available
device = ''
if torch.cuda.is_available():
    print('Using GPU')
    device = 'cuda'
# Only on torch night for Apple M1 GPU
elif torch.backends.mps.is_available():
    print('Using MPS (Apple Silicon GPU)')
    device = 'mps'
else:
    print('Using CPU, :(')
    device = 'cpu'

Using MPS


## Datasets & loaders

In [29]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

# starting w/ MD dataset
df = pd.read_csv('../data/MD_LoD.csv', index_col = 0)
df

# exclude labels == 2 (slightly agreement)
df = df[df['disagreement_lev'] != 2]
# shuffle dataser
df = df.sample(frac=1)


BATCH_SIZE = 16

train_set = CustomDatasetBinary(df[df['split'] == 'train'], tokenizer, label_col = 'disagreement_lev')
val_set = CustomDatasetBinary(df[df['split'] == 'dev'], tokenizer, label_col = 'disagreement_lev')
test_set = CustomDatasetBinary(df[df['split'] == 'test'], tokenizer, label_col = 'disagreement_lev')


############ only to overfit a batch (test to check model's correctness)
fake_df = pd.concat(
    [df[:BATCH_SIZE]] * 7   # concat the same batch multiple times
)
train_set = CustomDatasetBinary(fake_df, tokenizer, label_col = 'disagreement_lev')
val_set = CustomDatasetBinary(fake_df, tokenizer, label_col = 'disagreement_lev')
test_set = CustomDatasetBinary(fake_df, tokenizer, label_col = 'disagreement_lev')
print(f'----------\n|-> Using fake_df: size {len(fake_df)}\n----------')
############ comment this section to train on the entire data available


print(f'len: \n\tTrain: {len(train_set)}\n\tVal: {len(val_set)}\n\tTest: {len(test_set)}')

train_loader = DataLoader(train_set, batch_size = BATCH_SIZE, shuffle = False)
val_loader = DataLoader(val_set, batch_size = BATCH_SIZE, shuffle = False)
test_loader = DataLoader(test_set, batch_size = BATCH_SIZE, shuffle = False)

----------
|-> Using fake_df: size 112
----------
len: 
	Train: 112
	Val: 112
	Test: 112


## Model

In [30]:
from models.TransformerBinaryClassifier import TransformerBinaryClassifier

from transformers import AutoModel

bert_model = AutoModel.from_pretrained(model_name)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
# input of the model: 
#   input_ids
#   attention_mask
model = TransformerBinaryClassifier(
    model = bert_model,
    dropout_rate = .2,
)
inp, lbl = next(iter(train_loader))
out = model(
    inp['input_ids'],
    inp['attention_mask'],
)
print(out.shape)
out.reshape(len(out)).shape


torch.Size([16, 1])


torch.Size([16])

In [5]:
from models.utils import train_loop
import torch.nn as nn

from torch.optim import Adam
criterion = nn.BCELoss()
optimizer = Adam(model.parameters(), lr = 1e-4)

history = train_loop(
    epochs = 5, 
    model = model, 
    device = device, 
    train_loader = train_loader, 
    val_loader = val_loader, 
    optimizer = optimizer, 
    criterion = criterion,
    log_freq = 50, 
    name = 'test0',
)

-- Epoch 01 --
   [Recap 01 epoch] - train_loss: 0.770, train_acc: 0.3750 | eval_loss: 0.582, eval_acc: 0.9000 | elapsed time: 17.2s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

-- Epoch 02 --
   [Recap 02 epoch] - train_loss: 0.627, train_acc: 0.7188 | eval_loss: 0.542, eval_acc: 0.9000 | elapsed time: 17.3s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

-- Epoch 03 --
   [Recap 03 epoch] - train_loss: 0.613, train_acc: 0.6562 | eval_loss: 0.477, eval_acc: 0.9000 | elapsed time: 17.0s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

-- Epoch 04 --
   [Recap 04 epoch] - train_loss: 0.479, train_acc: 0.7812 | eval_loss: 0.394, eval_acc: 0.9000 | elapsed time: 17.0s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

-- Epoch 05 --
   [Recap 05 epoch] - train_loss: 0.374, train_acc: 0.7812 | eval_loss: 0.336, eval_acc: 0.9000 | elapsed time: 16.9s	 <-- Best epoch so far, val	 <-- Best epoch so far, acc

Done


### Plot

In [None]:
palette = {
    0: '#ACBEA3', 
    1: '#40476D', 
    2: '#826754', 
    3: '#EB6534',
    4: '#AD5D4E'
}
plt.figure(figsize= (17, 11), dpi = 200)

plt.subplot(2, 1, 1)
plt.plot(history['train_losses'], label = 'Train loss', color = palette[1])
plt.plot(history['val_losses'], label = 'Validation loss', color = palette[3])
plt.title('Training loss')
plt.xlabel('Epoch(s)')
plt.ylabel('Loss')
plt.grid(alpha = .5)
plt.legend()

plt.subplot(2, 1, 2)
plt.plot(history['train_accs'], label = 'Train accuracy', color = palette[1])
plt.plot(history['val_accs'], label = 'Validation accuracy', color = palette[3])
plt.title('Training accuracy')
plt.xlabel('Epoch(s)')
plt.ylabel('Accuracy')
plt.grid(alpha = .5)
plt.legend()

plt.show()

In [None]:
# tmp store
import pickle
with open('objs.pkl', 'w') as f:
    pickle.dump(history, f)

## Test

In [None]:
from models.utils import compute_acc

test_model = history['final_model'].to(device)

test_model.eval()
test_accs = .0

for inputs, labels in test_loader():
    for ele in inputs:
            inputs[ele] = inputs[ele].to(device)
    labels = labels.to(device)
    with torch.no_grad():
          outputs = test_model(
                inputs['input_ids'],
                inputs['attention_mask'],
            ).reshape(len(labels))
    test_accs += compute_acc(outputs, labels)

print(f'Final accuracy: {test_accs / len(test_loader)}')