In [1]:
!pip install -qq torchflare
!pip install -qq transformers

[K     |████████████████████████████████| 85 kB 3.1 MB/s 
[K     |████████████████████████████████| 17.3 MB 42 kB/s 
[K     |████████████████████████████████| 10.3 MB 52.9 MB/s 
[K     |████████████████████████████████| 98 kB 9.4 MB/s 
[K     |████████████████████████████████| 15.3 MB 289 kB/s 
[K     |████████████████████████████████| 3.0 MB 51.2 MB/s 
[K     |████████████████████████████████| 22.3 MB 1.3 MB/s 
[K     |████████████████████████████████| 9.9 MB 57.1 MB/s 
[K     |████████████████████████████████| 61 kB 502 kB/s 
[K     |████████████████████████████████| 735.5 MB 13 kB/s 
[K     |████████████████████████████████| 51.0 MB 84 kB/s 
[K     |████████████████████████████████| 37.1 MB 1.2 MB/s 
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchtext 0.10.0 requires torch==1.9.0, but you have torch 1.8.0 which is incompatible.
tensorf

In [2]:
from torchflare.experiments import Experiment, ModelConfig
import torch
from torchflare.metrics import MetricMeter, _BaseMetric
from functools import partial
import torch.nn.functional as F
import sklearn.metrics as skm
import torch.nn as nn
import transformers
import pandas as pd
from enum import Enum
import torchflare.callbacks as cbs
import pickle

import warnings
warnings.filterwarnings("ignore")

In [3]:
#Change Backbone name here.
BACKBONE_NAME = "prajjwal1/bert-small"

In [4]:
train_path = "/content/drive/MyDrive/Hahakathon/train.csv"
dev_path = "/content/drive/MyDrive/Hahakathon/dev.csv"


In [5]:
class TASKS(Enum):
    IS_HUMOR = "is_humor"
    HUMOR_CONTROVERSY = "humor_controversy"
    HUMOR_RATING = "humor_rating"
    OFFENSE_RATING = "offense_rating"

In [6]:

class MultiTaskDataset:
    def __init__(self, csv_path, backbone_name, max_len):
        self.data = self.read_df(path=csv_path)
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(backbone_name)
        self.max_len = max_len

    @staticmethod
    def read_df(path):
        df = pd.read_csv(path)
        df = df.fillna(0)
        df[TASKS.HUMOR_CONTROVERSY.value] = df[TASKS.HUMOR_CONTROVERSY.value].astype('int')
        return df

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        inps = self.tokenizer(
            self.data.iloc[idx]["text"],
            max_length=self.max_len,
            padding="max_length",
            return_tensors="pt",
            truncation=True,
        )
        inps = {k: v.squeeze(0) for k, v in inps.items()}
        label = {
            TASKS.IS_HUMOR.value: torch.tensor(self.data.iloc[idx][TASKS.IS_HUMOR.value]),
            TASKS.HUMOR_CONTROVERSY.value: torch.tensor(self.data.iloc[idx][TASKS.HUMOR_CONTROVERSY.value]),
            TASKS.HUMOR_RATING.value: torch.tensor(self.data.iloc[idx][TASKS.HUMOR_RATING.value], dtype = torch.float),
        }

        return inps, label

In [7]:
class SklearnF1(MetricMeter, _BaseMetric):
    def __init__(self, target_name: str):
        super(SklearnF1, self).__init__(multilabel=False)
        self.f1 = skm.f1_score
        self._outputs = None
        self._targets = None
        self.target_name = target_name
        self.reset()

    def handle(self):
        return f"{self.target_name}_{self.f1.__name__.lower()}"

    def accumulate(self, outputs: torch.Tensor, targets: torch.Tensor):
        """Method to accumulate the outputs and targets.
        Args:
            outputs(torch.Tensor) : raw logits from the network.
            targets(torch.Tensor) : Ground truth targets
        """
        outputs, targets = self.detach_tensor(outputs[self.target_name]), self.detach_tensor(targets[self.target_name])
        outputs = torch.argmax(outputs, dim=1)
        self._outputs.append(outputs)
        self._targets.append(targets)

    def reset(self):
        """Resets the accumulation lists."""
        self._outputs = []
        self._targets = []

    @property
    def value(self):
        outputs = torch.cat(self._outputs)
        targets = torch.cat(self._targets)
        f1_score = self.f1(targets.numpy(), outputs.numpy())
        return torch.tensor(f1_score)


In [8]:

class SklearnMSE(MetricMeter, _BaseMetric):
    def __init__(self, target_name: str):
        super(SklearnMSE, self).__init__(multilabel=False)
        self.mse = partial(skm.mean_squared_error, squared=False)
        self._outputs = None
        self._targets = None
        self.target_name = target_name
        self.reset()

    def handle(self):
        return f"{self.target_name}_{self.mse.func.__name__.lower()}"

    def accumulate(self, outputs: torch.Tensor, targets: torch.Tensor):
        """Method to accumulate the outputs and targets.
        Args:
            outputs(torch.Tensor) : raw logits from the network.
            targets(torch.Tensor) : Ground truth targets
        """
        outputs, targets = self.detach_tensor(outputs[self.target_name]), self.detach_tensor(targets[self.target_name])
        self._outputs.append(outputs)
        self._targets.append(targets)

    def reset(self):
        """Resets the accumulation lists."""
        self._outputs = []
        self._targets = []

    @property
    def value(self):
        outputs = torch.cat(self._outputs)
        targets = torch.cat(self._targets)
        mse_score = self.mse(targets.numpy(), outputs.numpy())
        return torch.tensor(mse_score)

In [9]:

def multitaskloss(op, y):
    ids = y[TASKS.IS_HUMOR.value] == 1
    is_humor_loss = F.cross_entropy(op[TASKS.IS_HUMOR.value], y[TASKS.IS_HUMOR.value].long())
    humor_controvery_loss = F.cross_entropy(op[TASKS.HUMOR_CONTROVERSY.value][ids],
                                            y[TASKS.HUMOR_CONTROVERSY.value][ids].long())
    humor_rating_loss = F.mse_loss(op[TASKS.HUMOR_RATING.value][ids], y[TASKS.HUMOR_RATING.value][ids].view(-1,1))


    return (is_humor_loss + humor_controvery_loss + humor_rating_loss).float()


In [10]:

class HydraNetwork(nn.Module):
    def __init__(self, model_path):
        super(HydraNetwork, self).__init__()

        self.model = transformers.AutoModel.from_pretrained(
            model_path, return_dict=False
        )
        in_features = self.model.pooler.dense.out_features
        self.is_humor_head = torch.nn.Linear(in_features=in_features, out_features=2)
        self.humor_controvery_head = torch.nn.Linear(in_features=in_features, out_features=2)
        self.humor_rating_head = torch.nn.Linear(in_features=in_features, out_features=1)


    def forward(self, x):
        _, o_2 = self.model(**x)
        return {TASKS.IS_HUMOR.value: self.is_humor_head(o_2),
                TASKS.HUMOR_CONTROVERSY.value: self.humor_controvery_head(o_2),
                TASKS.HUMOR_RATING.value: self.humor_rating_head(o_2)}



In [11]:
@cbs.on_experiment_end(order=cbs.CallbackOrder.EXTERNAL)
def save_pickle_experiment(experiment: "Experiment"):
    if "/" in BACKBONE_NAME:
        prefix = BACKBONE_NAME.split("/")[1]
    else:
        prefix = BACKBONE_NAME
    pickle.dump(experiment.history, open(f"{prefix}-experiment.pkl", "wb"))


In [12]:

class MultiTaskTrainer(Experiment):

    def get_model_params(self, optimizer):
        param_optimizer = list(self.state.model.named_parameters())

        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.001,
            },
            {
                "params": [
                    p for n, p in param_optimizer if any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.0,
            },
        ]
        return optimizer_parameters

In [14]:
train_ds = MultiTaskDataset(csv_path = train_path, backbone_name = BACKBONE_NAME, max_len = 256)
val_ds = MultiTaskDataset(csv_path = dev_path, backbone_name = BACKBONE_NAME, max_len = 256)

train_dl = torch.utils.data.DataLoader(train_ds, batch_size = 32, shuffle = True)
val_dl = torch.utils.data.DataLoader(val_ds, batch_size = 32, shuffle = False)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=286.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




In [15]:
config = ModelConfig(
    nn_module=HydraNetwork,
    module_params={"model_path": BACKBONE_NAME},
    optimizer="Adam",
    optimizer_params={"lr": 3e-5},
    criterion=multitaskloss,
)
callbacks = [
    save_pickle_experiment
]
metric_list = [SklearnF1(target_name = TASKS.IS_HUMOR.value) , SklearnF1(target_name = TASKS.HUMOR_CONTROVERSY.value),
               SklearnMSE(target_name = TASKS.HUMOR_RATING.value)]

In [16]:
multitask_exp = MultiTaskTrainer(num_epochs=5, seed=42, fp16=True, device="cuda")

In [17]:
multitask_exp.compile_experiment(model_config=config, callbacks=callbacks, metrics=metric_list)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=116270890.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at prajjwal1/bert-small were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [18]:
multitask_exp.fit_loader(train_dl, val_dl)


Epoch: 1/5

Epoch: 2/5

Epoch: 3/5

Epoch: 4/5

Epoch: 5/5


In [19]:
test_path = "/content/drive/MyDrive/Hahakathon/gold-test-27446.csv"
test_ds = MultiTaskDataset(csv_path = test_path, backbone_name = BACKBONE_NAME, max_len = 256)
test_dl = torch.utils.data.DataLoader(test_ds, batch_size = 64, shuffle = False)

In [20]:
from torchflare.experiments import to_device

In [21]:
is_humor = []
humor_contro = []
humor_rating = []
for batch in test_dl:
    batch = to_device(batch[0] , device = "cuda")
    with torch.no_grad():
        op = multitask_exp.state.model(batch)
    is_humor.extend(torch.argmax(op[TASKS.IS_HUMOR.value] , dim = 1).cpu().numpy())
    humor_contro.extend(torch.argmax(op[TASKS.HUMOR_CONTROVERSY.value] , dim = 1).cpu().numpy())
    humor_rating.extend(op[TASKS.HUMOR_RATING.value].cpu().numpy())

    


## Scores on Gold-Test Data 

In [22]:
print(f"Task 1A(F1-Score) : {skm.f1_score(test_ds.data.loc[: , 'is_humor'].values , is_humor)}")
print(f"Task 1B(RMSE) : {skm.mean_squared_error(test_ds.data.loc[: , 'humor_rating'].values , humor_rating , squared = False)}")
print(f"Task 1C(F1-Score) : {skm.f1_score(test_ds.data.loc[: , 'humor_controversy'].values , humor_contro)}")


Task 1A(F1-Score) : 0.9129720853858785
Task 1B(RMSE) : 1.1139141666977794
Task 1C(F1-Score) : 0.4918851435705368


In [23]:
torch.save(multitask_exp.state.model.state_dict(), "bert_small_multitask.bin")