In [1]:
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
Paths used for configuration of notebook: 
    	/root/.jupyter/nbconfig/notebook.json
Paths used for configuration of notebook: 
    	
      - Validating: [32mOK[0m
Paths used for configuration of notebook: 
    	/root/.jupyter/nbconfig/notebook.json


In [2]:
!pip freeze

absl-py==1.3.0
aeppl==0.0.33
aesara==2.7.9
aiohttp==3.8.3
aiosignal==1.3.1
alabaster==0.7.12
albumentations==1.2.1
altair==4.2.0
appdirs==1.4.4
arviz==0.12.1
astor==0.8.1
astropy==4.3.1
astunparse==1.6.3
async-timeout==4.0.2
atari-py==0.2.9
atomicwrites==1.4.1
attrs==22.1.0
audioread==3.0.0
autograd==1.5
Babel==2.11.0
backcall==0.2.0
beautifulsoup4==4.6.3
bleach==5.0.1
blis==0.7.9
bokeh==2.3.3
branca==0.6.0
bs4==0.0.1
CacheControl==0.12.11
cachetools==5.2.0
catalogue==2.0.8
certifi==2022.9.24
cffi==1.15.1
cftime==1.6.2
chardet==3.0.4
charset-normalizer==2.1.1
click==7.1.2
clikit==0.6.2
cloudpickle==1.5.0
cmake==3.22.6
cmdstanpy==1.0.8
colorcet==3.0.1
colorlover==0.3.0
community==1.0.0b1
confection==0.0.3
cons==0.4.5
contextlib2==0.5.5
convertdate==2.4.0
crashtest==0.3.1
crcmod==1.7
cufflinks==0.17.3
cupy-cuda11x==11.0.0
cvxopt==1.3.0
cvxpy==1.2.2
cycler==0.11.0
cymem==2.0.7
Cython==0.29.32
daft==0.0.4
dask==2022.2.1
datascience==0.17.5
datasets==2.7.1
db-dtypes==1.0.4
debugpy==1.0.0
de

In [3]:
import pandas as pd
import os

# Load Arguments Dataset
data_folder = './data/'

train_arguments_file = 'arguments-training.tsv'
train_labels_file = 'labels-training.tsv'

validation_arguments_file = 'arguments-validation.tsv'
validation_labels_file = 'labels-validation.tsv'


arguments_train_df = pd.read_csv(os.path.join(data_folder, train_arguments_file), encoding='utf-8', sep='\t', header=0)
labels_train_df = pd.read_csv(os.path.join(data_folder, train_labels_file), encoding='utf-8', sep='\t', header=0)

arguments_validation_df = pd.read_csv(os.path.join(data_folder, validation_arguments_file), encoding='utf-8', sep='\t', header=0)
labels_validation_df = pd.read_csv(os.path.join(data_folder, validation_labels_file), encoding='utf-8', sep='\t', header=0)

#print(arguments_train_df)
#print(labels_train_df)
#print(arguments_validation_df)
#print(labels_validation_df)

In [4]:
from datasets import Dataset
import datasets
from sklearn.model_selection import train_test_split

# Combine the columsn in arguments to be a single field to give to bert

# Inputs: 
# an argument df from the source data (ArgumentId, Conclusion, Stance, Premise). 
# Labels df from file. 
# Name of label that will be trained on.

# Returns: df with a single column of arguments that is Conclusion: Conclusion, Stance: stance, Premise: Premise 
# along with the labels

from sklearn.utils import resample

def setup_df(arguments_df, labels_df, target_label):
    arguments_df['text'] = 'Conclusion: ' + arguments_df['Conclusion'] + ', Stance: ' + arguments_df['Stance'] + ', Premise: ' + arguments_df['Premise']
    resp = arguments_df.filter(['text'], axis=1)
    resp['label'] = labels_df[target_label]
    return resp

# This is where the specific value label is selected.
target_label = 'Universalism: concern'

train = setup_df(arguments_train_df, labels_train_df, target_label)
validation = setup_df(arguments_validation_df, labels_validation_df, target_label)

train, test = train_test_split(train, test_size=0.2)

dataset = datasets.DatasetDict({'train': Dataset.from_pandas(train), 'validation': Dataset.from_pandas(validation), 'test': Dataset.from_pandas(test)})
print(type(Dataset.from_pandas(train)))
print(type(Dataset.from_pandas(test)))

<class 'datasets.arrow_dataset.Dataset'>
<class 'datasets.arrow_dataset.Dataset'>


In [5]:
from typing import List
from transformers import AutoTokenizer


class BatchTokenizer:

  def __init__(self) -> None:
     self.tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    
  def get_sep_token(self,):
    return self.tokenizer.sep_token
  
  def __call__(self, batch: List[str]):

    enc = self.tokenizer(
        batch,
        padding=True,
        return_token_type_ids=False,
        return_tensors='pt',
        max_length=60,
        truncation=True
    )

    return enc 

In [6]:
from torch.utils.data.dataset import TensorDataset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
import torch 

t = BatchTokenizer()

train_text = dataset['train']['text']
train_labels = torch.tensor(dataset['train']['label'])

train_encoded = t(*[train_text])
train_masks = train_encoded['attention_mask']
train_inp_ids = train_encoded['input_ids']


validation_text = dataset['validation']['text']
validation_labels = torch.tensor(dataset['validation']['label'])

validation_encoded = t(*[validation_text])
validation_masks = validation_encoded['attention_mask']
validation_inp_ids = validation_encoded['input_ids']



test_text = dataset['test']['text']
test_labels = torch.tensor(dataset['test']['label'])

test_encoded = t(*[test_text])
test_masks = test_encoded['attention_mask']
test_inp_ids = test_encoded['input_ids']





validation_set = TensorDataset(validation_inp_ids, validation_masks, validation_labels)
train_set = TensorDataset(train_inp_ids, train_masks, train_labels)
test_set = TensorDataset(test_inp_ids, test_masks, test_labels)


train_dataloader = DataLoader(
    train_set,
    sampler=RandomSampler(train_set),
    batch_size=2
)

validation_dataloader = DataLoader(
    validation_set,
    sampler=RandomSampler(validation_set),
    batch_size=2
)

test_dataloader = DataLoader(
    test_set,
    sampler=RandomSampler(test_set),
    batch_size=2
)



In [7]:
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))
    torch.cuda.empty_cache()

Tesla T4


In [8]:
import random
from typing import Dict
import numpy as np 

device = torch.device("cuda")

def predict(model: torch.nn.Module, map: Dict) -> List:
    with torch.no_grad():
        out = model(**map)
        logits = out[0]
        logits = logits.detach().cpu()
        return list(torch.argmax(logits, axis=1).squeeze().numpy())

def precision(predicted_labels, true_labels, which_label=1):
  pred_which = np.array(predicted_labels) == which_label
  true_which = np.array(true_labels) == which_label
  denominator = np.sum(pred_which)
  if denominator:
    return np.sum(np.logical_and(pred_which, true_which))/denominator
  else:
    return 0. 

def recall(predicted_labels, true_labels, which_label=1):
  pred_which = np.array(predicted_labels) == which_label
  true_which = np.array(true_labels) == which_label
  denominator = np.sum(true_which)
  if denominator:
    return np.sum(np.logical_and(pred_which, true_which))/denominator
  else:
    return 0. 

def f1_score(predicted_labels: List[int], true_labels: List[int], which_label: int):
  P = precision(predicted_labels, true_labels, which_label=which_label)
  R = recall(predicted_labels, true_labels, which_label=which_label)
  if P and R:
    return  2*P*R/(P+R)
  else:
    return 0. 

def macro_f1(predicted_labels: List[int], true_labels: List[int], possible_labels: List[int]):
  scores = [f1_score(predicted_labels, true_labels, l) for l in possible_labels]
  return sum(scores) / len(scores)

In [9]:
def training_loop(num_epochs, train_features, dev_features, optimizer, model):
  print("Training...")
  for i in range(num_epochs):
    losses = []
    model.train()
    loss = 0 
    for n, features in enumerate(train_features):

      map = {
          'input_ids': features[0].to(device),
          'attention_mask': features[1].to(device),
          'labels': features[2].to(device)
      }

      model.zero_grad()

      out = model(**map)

      loss = out[0]

      losses.append(loss.item())

      loss.backward()

      optimizer.step()

    print(f"epoch {i}, loss: {sum(losses)/len(losses)}")
    print("Evaluating dev...")

    all_preds = []
    all_labels = [] 

    for features in dev_features:

      features = tuple(t.to(device) for t in features)

      input_ids, attention_mask, labels = features

      map = {
          'input_ids': features[0],
          'attention_mask': features[1]
      }

      pred = predict(model, map)

      if len(pred) != 2:
        break

      all_preds.append(pred)
      labels = labels.cpu()
      all_labels.append(list(labels.numpy()))

    dev_f1 = macro_f1(all_preds, all_labels, [0, 1])
    print(f"Dev F1 {dev_f1}")
    print("-------------------------------------------------------")
  return model

In [10]:
from transformers import BertForSequenceClassification
epochs = 3

LR = 0.00001

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2).to(device)

optimizer = torch.optim.Adam(model.parameters(), LR)

training_loop(
    epochs, 
    train_dataloader,
    validation_dataloader, 
    optimizer,
    model,
)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

Training...
epoch 0, loss: 0.5743785216026164
Evaluating dev...
Dev F1 0.70077453865221
-------------------------------------------------------
epoch 1, loss: 0.42651624013192685
Evaluating dev...
Dev F1 0.7005889517023249
-------------------------------------------------------
epoch 2, loss: 0.24677392924709637
Evaluating dev...
Dev F1 0.7104770999454908
-------------------------------------------------------


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, element

In [11]:
all_preds = []
all_labels = []
torch.cuda.empty_cache()

for features in test_dataloader:

      features = tuple(t.to(device) for t in features)

      input_ids, attention_mask, labels = features

      if len(labels) != 2:
        continue

      map = {
          'input_ids': features[0],
          'attention_mask': features[1]
      }

      pred = predict(model, map)

      if len(pred) != 2:
        break

      all_preds.append(pred)
      labels = labels.cpu()
      all_labels.append(list(labels.numpy()))

test_f1 = macro_f1(all_preds, all_labels, [0, 1])
print(f"Test F1 {test_f1}")



Test F1 0.7200444516634044
