In [28]:
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from transformers import (
    RobertaTokenizerFast,
    RobertaModel,
    TrainingArguments,
    Trainer,
    AutoConfig,
)
from tqdm import tqdm
from sklearn.metrics import f1_score

In [2]:
id2label_cert = {0: "Certain", 1: "Somewhat certain", 2: "Somewhat uncertain", 3: "Uncertain"}
label2id_cert = {"Certain": 0, "Somewhat certain": 1, "Somewhat uncertain": 2, "Uncertain": 3}

id2label_caus = {0: "Explicitly states: no relation", 1: "Causation", 2: "Correlation", 3: "No mention of a relation"}
label2id_caus = {"Explicitly states: no relation": 0, "Causation": 1, "Correlation": 2, "No mention of a relation": 3}

In [3]:
model_id = "roberta-base"

In [4]:
train_dataset = pd.read_csv('../data/causality_cert_train.csv', index_col=0)
train_dataset["certainty"] = train_dataset.certainty.map(label2id_cert)
train_dataset["causality"] = train_dataset.causality.map(label2id_caus)

test_dataset = pd.read_csv('../data/causality_cert_test.csv', index_col=0)
test_dataset["certainty"] = test_dataset.certainty.map(label2id_cert)
test_dataset["causality"] = test_dataset.causality.map(label2id_caus)

In [5]:
from datasets import Dataset

tokenizer = RobertaTokenizerFast.from_pretrained(model_id)

In [6]:
class ClassificationData(Dataset):
    def __init__(self, dataframe, tokenizer, max_len):
        self.tokenizer = tokenizer
        self.dataframe = dataframe.reset_index(drop=True)
        self.max_len = max_len

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        if isinstance(index, (list, np.ndarray)):
            # idx is a list of indices -> return a batch dict
            batch = [self._getitem_single(i) for i in index]
            # batch is list of dicts; collate into dict of tensors:
            return {
                key: torch.stack([item[key] for item in batch])
                for key in batch[0]
            }
        else:
            # single idx
            return self._getitem_single(index)

    def _getitem_single(self, index):
        row = self.dataframe.iloc[index]
        text = str(row.finding)
        text = " ".join(text.split())

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            padding='max_length',
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        token_type_ids = inputs["token_type_ids"]

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'causality': torch.tensor(row["causality"], dtype=torch.float),
            'certainty': torch.tensor(row["certainty"], dtype=torch.float)
        }

In [7]:
train_dataset = train_dataset.reset_index(drop=True)
test_dataset = test_dataset.reset_index(drop=True)

training_set = ClassificationData(train_dataset, tokenizer, 512)
testing_set = ClassificationData(test_dataset, tokenizer, 512)

In [8]:
train_params = {'batch_size': 4,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': 4,
                'shuffle': True,
                'num_workers': 0
                }

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

In [9]:
class RobertaClass(torch.nn.Module):
    def __init__(self):
        super(RobertaClass, self).__init__()
        self.l1 = RobertaModel.from_pretrained("roberta-base")
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, 8)
        self.unflatten = torch.nn.Unflatten(1, (2, 4))

    def forward(self, input_ids, attention_mask, token_type_ids):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        pooler = self.classifier(pooler)
        output = self.unflatten(pooler)
        return output

In [10]:
# Setting up the device for GPU usage

from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [11]:
model = RobertaClass()
model.to(device)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaClass(
  (l1): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((

In [12]:
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(params=model.parameters(), lr=2e-5, weight_decay=0.01)

In [13]:
def calcuate_accuracy(preds, targets):
    n_correct = (preds==targets).sum().item()
    return n_correct

In [14]:
def train(epoch):
    tr_loss = 0
    n_correct_caus = 0
    n_correct_cert = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    model.train()
    for _,data in tqdm(enumerate(training_loader, 0)):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)

        token_type_ids = data['token_type_ids'].to(device, dtype = torch.long)
        certainty = data['certainty'].to(device, dtype = torch.long)
        causality = data['causality'].to(device, dtype = torch.long)

        outputs = model(ids, mask, token_type_ids)
        loss_cert = loss_function(torch.squeeze(outputs[:, 0, :]), certainty)
        loss_caus = loss_function(torch.squeeze(outputs[:, 1, :]), causality)
        loss = loss_cert + loss_caus
        tr_loss += loss.item()
        big_val, big_idx = torch.max(outputs.data, dim=2)
        n_correct_cert += calcuate_accuracy(torch.squeeze(big_idx[:, 0]), certainty)
        n_correct_caus += calcuate_accuracy(torch.squeeze(big_idx[:, 1]), causality)

        nb_tr_steps += 1
        nb_tr_examples+=certainty.size(0)

        if _%5000==0:
            loss_step = tr_loss/nb_tr_steps
            accu_step_caus = (n_correct_caus*100)/nb_tr_examples
            accu_step_cert = (n_correct_cert*100)/nb_tr_examples
            print(f"Training Loss per 5000 steps: {loss_step}")
            print(f"Training Accuracy per 5000 steps: causality - {accu_step_caus}, certainty - {accu_step_cert}")

        optimizer.zero_grad()
        loss.backward()
        # # When using GPU
        optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: causality - {(n_correct_caus*100)/nb_tr_examples} certainty - {(n_correct_cert*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: causality - {(n_correct_caus*100)/nb_tr_examples} certainty - {(n_correct_cert*100)/nb_tr_examples}")

    return

In [15]:
EPOCHS = 5
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
for epoch in range(EPOCHS):
    train(epoch)

0it [00:00, ?it/s]

Training Loss per 5000 steps: 2.6448864936828613
Training Accuracy per 5000 steps: causality - 50.0, certainty - 0.0


335it [02:42,  2.06it/s]


The Total Accuracy for Epoch 0: causality - 81.71641791044776 certainty - 0.0
Training Loss Epoch: 2.4426621807155326
Training Accuracy Epoch: causality - 81.71641791044776 certainty - 0.0


1it [00:00,  2.08it/s]

Training Loss per 5000 steps: 2.6892056465148926
Training Accuracy per 5000 steps: causality - 100.0, certainty - 0.0


335it [02:44,  2.04it/s]


The Total Accuracy for Epoch 1: causality - 105.3731343283582 certainty - 0.0
Training Loss Epoch: 2.1799345030713435
Training Accuracy Epoch: causality - 105.3731343283582 certainty - 0.0


1it [00:00,  2.11it/s]

Training Loss per 5000 steps: 1.8130110502243042
Training Accuracy per 5000 steps: causality - 125.0, certainty - 0.0


335it [02:44,  2.04it/s]


The Total Accuracy for Epoch 2: causality - 122.76119402985074 certainty - 0.0
Training Loss Epoch: 1.8945176437719544
Training Accuracy Epoch: causality - 122.76119402985074 certainty - 0.0


1it [00:00,  2.04it/s]

Training Loss per 5000 steps: 1.1493356227874756
Training Accuracy per 5000 steps: causality - 175.0, certainty - 0.0


335it [02:44,  2.04it/s]


The Total Accuracy for Epoch 3: causality - 136.26865671641792 certainty - 0.0
Training Loss Epoch: 1.580358878801118
Training Accuracy Epoch: causality - 136.26865671641792 certainty - 0.0


1it [00:00,  2.07it/s]

Training Loss per 5000 steps: 1.2888275384902954
Training Accuracy per 5000 steps: causality - 125.0, certainty - 0.0


335it [02:44,  2.04it/s]

The Total Accuracy for Epoch 4: causality - 149.17910447761193 certainty - 0.0
Training Loss Epoch: 1.2997220259993825
Training Accuracy Epoch: causality - 149.17910447761193 certainty - 0.0





In [30]:
def valid(model, testing_loader):
    model.eval()
    n_correct_caus = 0; n_wrong_caus = 0; total = 0; tr_loss=0; nb_tr_steps=0; nb_tr_examples=0
    n_correct_cert = 0; n_wrong_cert = 0
    
    targets_caus = []
    outputs_caus = []
    targets_cert = []
    outputs_cert = []
    
    with torch.no_grad():
        for _, data in tqdm(enumerate(testing_loader, 0)):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            token_type_ids = data['token_type_ids'].to(device, dtype=torch.long)
            certainty = data['certainty'].to(device, dtype = torch.long)
            causality = data['causality'].to(device, dtype = torch.long)
            
            outputs = model(ids, mask, token_type_ids).squeeze()
            loss_cert = loss_function(torch.squeeze(outputs[:, 0, :]), certainty)
            loss_caus = loss_function(torch.squeeze(outputs[:, 1, :]), causality)
            loss = loss_cert + loss_caus
            tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.data, dim=2)
            n_correct_cert += calcuate_accuracy(torch.squeeze(big_idx[:, 0]), certainty)
            n_correct_caus += calcuate_accuracy(torch.squeeze(big_idx[:, 1]), causality)

            outputs_cert.append(big_idx[:, 0].cpu().detach().numpy())
            targets_cert.append(certainty.cpu().detach().numpy())
            outputs_caus.append(big_idx[:, 1].cpu().detach().numpy())
            targets_caus.append(causality.cpu().detach().numpy())
            
            nb_tr_steps += 1
            nb_tr_examples+=big_idx[:, 0].size(0)
            
            if _%5000==0:
                loss_step = tr_loss/nb_tr_steps
                # accu_step = (n_correct*100)/nb_tr_examples
                print(f"Validation Loss per 100 steps: {loss_step}")
                # print(f"Validation Accuracy per 100 steps: {accu_step}")
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu_caus = (n_correct_caus*100)/nb_tr_examples
    epoch_accu_cert = (n_correct_cert*100)/nb_tr_examples
    print(f"Validation Loss Epoch: {epoch_loss}")
    print(f"Validation Accuracy Epoch: caus: {epoch_accu_caus} cert: {epoch_accu_cert}")
    
    outputs_caus = np.concatenate(outputs_caus)
    targets_caus = np.concatenate(targets_caus)
    outputs_cert = np.concatenate(outputs_cert)
    targets_cert = np.concatenate(targets_cert)
    
    print("causality")
    print(f1_score(outputs_caus, targets_caus, average=None))
    
    print("certainty")
    print(f1_score(outputs_cert, targets_cert, average=None))
    
    return epoch_accu_caus, epoch_accu_cert

In [31]:
acc_caus, acc_cert = valid(model, testing_loader)
print("Accuracy on test data: causality = %0.2f%%, certainty = %0.2f%%" % (acc_caus, acc_cert))

2it [00:00,  5.95it/s]

Validation Loss per 100 steps: 2.6681876182556152


84it [00:14,  5.92it/s]

Validation Loss Epoch: 2.657751499896958
Validation Accuracy Epoch: caus: 52.395209580838326 cert: 54.49101796407186
causality
[0.4        0.54008439 0.52132701 0.52      ]
certainty
[0.67785235 0.5042735  0.36190476 0.19354839]
Accuracy on test data: causality = 52.40%, certainty = 54.49%



