In [1]:
!pip install -q transformers
!pip install -q datasets
!pip install -q evaluate
import pandas as pd
import numpy as np
import torch
import evaluate
from transformers import AutoModelForSequenceClassification
from train import get_dataloaders, train_model, calculate_f1

[0m

In [2]:
data_dir = './datasets/'

In [3]:
original_train_data = pd.read_csv(data_dir + 'train_data.csv')
#augmented_train_data = pd.read_csv(data_dir + 'augmented_data_label_1_pegasus.csv')
augmented_train_data = pd.read_csv(data_dir + 'augmented_data_label_1_parrot.csv')
train_data = pd.concat([original_train_data, augmented_train_data], axis=0)

val_data = pd.read_csv(data_dir + 'val_data.csv')

In [4]:
# create 5 balanced sub-datasets for bagging
num_sub_datasets = 2
train_data_list = []

ones = train_data[train_data['label'] == 1]
zeros = train_data[train_data['label'] == 0]
num = len(ones)

for i in range(num_sub_datasets):
    train_data_list.append(pd.concat([ones, zeros.sample(n=num, random_state=i)], axis=0))

In [None]:
# print(train_data_list[0]['label'].value_counts())
# print('Original train data shape: ', original_train_data.shape)
# print('Concatenated train data shape: ', train_data.shape)
# print(train_data_list[0])
# print(original_train_data)
# train_dataloader, val_dataloader = get_dataloaders(args, train_data_list[0], val_data)
# o_train_dataloader, o_val_dataloader = get_dataloaders(args, original_train_data, val_data)
# print(next(iter(train_dataloader)).keys())
# print(next(iter(o_train_dataloader)).keys())

In [None]:
'''
TODO
1. train the same model on 5 sub-datasets.
2. predict labels using the 5 models (voting).
3. calculate the accuracy and f1 score of the voting result.
'''

In [6]:
# Hyperparameters
NUM_EPOCHS = 1
BATCH_SIZE = 16
LEARNING_RATE = 2e-5
USE_LR_SCHEDULER = False
PRETRAINED_MODEL_NAME = "roberta-base"

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

args = {
    "NUM_EPOCHS": NUM_EPOCHS,
    "BATCH_SIZE": BATCH_SIZE,
    "LEARNING_RATE": LEARNING_RATE,
    "USE_LR_SCHEDULER": USE_LR_SCHEDULER,
    "PRETRAINED_MODEL_NAME": PRETRAINED_MODEL_NAME,
}

cuda


In [18]:
# Predict labels using the 5 models (voting)
def predict(model_list, test_dataloader):
    y_pred_list = []
    for model in model_list:
        model.eval()
        # The list of predictions for each model
        y_pred = []
        for batch in test_dataloader:
            batch = {k: v.to(device) for k, v in batch.items()}
            with torch.no_grad():
                outputs = model(**batch)
            logits = outputs.logits
            # Extend every batch
            y_pred.extend(logits.argmax(-1).cpu().numpy())
        # Now we have a list of list of predictions
        y_pred_list.append(y_pred)
    # Voting
    y_pred = np.array(y_pred_list).T
    y_pred = np.apply_along_axis(lambda x: np.bincount(x).argmax(), axis=1, arr=y_pred)
    return y_pred


def calculate_bagging_f1(model_list, val_dataloader):
    # Get the predictions
    y_pred = predict(model_list, val_dataloader)
    # Get the labels
    y_true = np.array([])
    for batch in val_dataloader:
        y_true = np.concatenate((y_true, batch['labels'].numpy()))
    # Calculate the f1 score
    metric = evaluate.combine(["accuracy", "f1", "precision", "recall"])
    result_dict = metric.compute(predictions=y_pred, references=y_true)
    return result_dict["f1"]

# If you need to load the models from disk here's your helper
def load_models(num_models: int):
    model_list = []
    for i in range(num_models):
        model = AutoModelForSequenceClassification.from_pretrained(f"bagging_model_{i}", num_labels=2)
        model_list.append(model)
    return model_list

In [9]:
# Construct our "bags"
model_list = []
for i in range(num_sub_datasets):
    print('Training model {}...'.format(i))
    train_dataloader, val_dataloader = get_dataloaders(args, train_data_list[i], val_data)
    model = train_model(args, device, train_dataloader, model_name = "bagging_model_{}".format(i))
    model_list.append(model)

Training model 0...


  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifi

Epoch: 0


train loss: 0.12058817595243454: 100%|██████████| 263/263 [06:22<00:00,  1.45s/it] 


Training model 1...


  0%|          | 0/5 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.bias', 'classifi

Epoch: 0


train loss: 0.09739437699317932: 100%|██████████| 263/263 [06:02<00:00,  1.38s/it]


In [20]:
print("Separate f1 score")
list_of_metrics = []
for i in range(num_sub_datasets):
    metric = calculate_f1(f"bagging_model_{i}", device, val_dataloader)
    list_of_metrics.append(metric)
print(list_of_metrics)
print("Bagging f1 score")
metric = calculate_bagging_f1(model_list, val_dataloader)
print(metric)

Separate f1 score
[0.5352112676056339, 0.5016949152542373]
Bagging f1 score
0.5690376569037657
