In [1]:
!pip install nlpaug
!pip install nltk
!pip install sacremoses
!pip install datasets
!pip install accelerate -U
!pip install transformers[torch]

Collecting nlpaug
  Downloading nlpaug-1.1.11-py3-none-any.whl (410 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.5/410.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: nlpaug
Successfully installed nlpaug-1.1.11
Collecting sacremoses
  Downloading sacremoses-0.1.1-py3-none-any.whl (897 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m897.5/897.5 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: sacremoses
Successfully installed sacremoses-0.1.1
Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from 

In [2]:
import torch
import torch.nn as nn
from torchtext.data.utils import get_tokenizer
import pandas as pd
import accelerate
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from transformers import MarianMTModel, MarianTokenizer
from datasets import Dataset
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from collections import Counter, defaultdict
import math
import copy
import random
import operator
import nlpaug.augmenter.char as nac
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.sentence as nas
import nlpaug.flow as naf
import time
import torch.nn as nn
from transformers import RobertaTokenizer, RobertaForSequenceClassification, TrainingArguments, Trainer
from transformers.modeling_outputs import TokenClassifierOutput
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from datasets import Dataset


nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [17]:
class BaseModel(nn.Module):
    def __init__(self, num_epochs=1):
        super(BaseModel, self).__init__()

        self.id2label = {0: "NEGATIVE", 1: "POSITIVE"}
        self.label2id = {"NEGATIVE": 0, "POSITIVE": 1}
        self.num_labels = 2

        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        self.model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=self.num_labels, id2label=self.id2label, label2id=self.label2id)
        self.loss_fn = nn.CrossEntropyLoss()
        self.classifier = nn.Linear(768, self.num_labels)
        self.dropout = nn.Dropout(0.1)
        self.trainer = None

        self.train_args = TrainingArguments(
            output_dir='./results',
            num_train_epochs=num_epochs,
            per_device_train_batch_size=32,
            per_device_eval_batch_size=64,
            warmup_steps=500,
            weight_decay=0.01,
            logging_strategy='steps',
            logging_steps=10,
            evaluation_strategy="epoch",
            logging_dir='./logs',
        )

    def compute_metrics(self, pred):
        labels = pred.label_ids
        preds = pred.predictions.argmax(-1)
        precision, recall, f1, _ = precision_recall_fscore_support(
            labels, preds, average="binary"
        )
        acc = accuracy_score(labels, preds)
        return {
            "accuracy": acc,
            "f1": f1,
            "precision": precision,
            "recall": recall,
        }

    def apply_tokenizer(self, batch):
        return self.tokenizer(
            batch["text"],
            truncation=True,
            padding=True,
            max_length=100,
            add_special_tokens=True,
        )


    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        cls_outputs = outputs.last_hidden_state[:, 0, :]
        dropout_output = self.dropout(cls_outputs)
        logits = self.classifier(dropout_output)

        loss = None

        if labels is not None:
            loss = self.loss_fn(logits, labels)

        return TokenClassifierOutput(loss=loss, logits=logits)


    def train(self, train_df, dev_df):
        train_hf = Dataset.from_pandas(train_df)
        dev_hf = Dataset.from_pandas(dev_df)

        tokenized_train = train_hf.map(self.apply_tokenizer, batched=True)
        tokenized_dev = dev_hf.map(self.apply_tokenizer, batched=True)

        self.trainer = Trainer(
            model=self.model,
            args=self.train_args,
            tokenizer=self.tokenizer,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_dev,
            compute_metrics=self.compute_metrics
        )

        self.trainer.train()

    def evaluate_train(self, train_df):
        input_hf = Dataset.from_pandas(train_df)
        tokenized_input = input_hf.map(self.apply_tokenizer, batched=True)
        return self.trainer.evaluate(tokenized_input)

    def evaluate_dev(self):
        return self.trainer.evaluate()

In [4]:
columns = ['id', 'identifier', 'category', 'country_code', 'text', 'multi_label']

try:
    data_df = pd.read_csv("/content/dontpatronizeme_pcl.tsv", sep='\t', header=None, names=columns, skiprows=3, index_col='id')
except pd.errors.ParserError as e:
    print("ParserError:", e)

data_df['label'] = data_df['multi_label'].apply(lambda x: 0 if x == 0 or x == 1 else 1)
data_df = data_df.dropna()
data_df["text_length"] = data_df["text"].apply(lambda x: len(x))

In [5]:
train_ids = pd.read_csv("/content/train_semeval_parids-labels.csv")
dev_ids = pd.read_csv("/content/dev_semeval_parids-labels.csv")

In [6]:
train_df = pd.read_csv("/content/bestprocaug.csv")
dev_df = data_df.loc[data_df.index.isin(dev_ids['par_id'])]

In [9]:
train_df_x = data_df.loc[data_df.index.isin(train_ids['par_id'])]

Model with BCE

In [20]:
model = BaseModel(num_epochs=5)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [21]:
model.train(train_df, dev_df)

Map:   0%|          | 0/11551 [00:00<?, ? examples/s]

Map:   0%|          | 0/2093 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1476,0.294045,0.897277,0.027149,0.136364,0.015075
2,0.1589,0.231907,0.919732,0.432432,0.659794,0.321608
3,0.1096,0.246461,0.920688,0.517442,0.613793,0.447236
4,0.0577,0.361441,0.926421,0.524691,0.68,0.427136
5,0.0096,0.422522,0.927377,0.509677,0.711712,0.396985


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-1000 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-1500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


In [22]:
model.evaluate_train(train_df)

Map:   0%|          | 0/11551 [00:00<?, ? examples/s]

{'eval_loss': 0.0075494637712836266,
 'eval_accuracy': 0.9983551207687646,
 'eval_f1': 0.9976031285480005,
 'eval_precision': 0.9992418498862775,
 'eval_recall': 0.9959697732997481,
 'eval_runtime': 20.4124,
 'eval_samples_per_second': 565.882,
 'eval_steps_per_second': 17.685,
 'epoch': 5.0}

In [23]:
model.evaluate_dev()

{'eval_loss': 0.42252156138420105,
 'eval_accuracy': 0.9273769708552317,
 'eval_f1': 0.5096774193548387,
 'eval_precision': 0.7117117117117117,
 'eval_recall': 0.3969849246231156,
 'eval_runtime': 4.2235,
 'eval_samples_per_second': 495.566,
 'eval_steps_per_second': 15.627,
 'epoch': 5.0}

In [19]:
del model

## Multi-Modal Model

In [24]:
train_df["country_num"] = pd.Categorical(train_df['country_code']).codes
dev_df["country_num"] = pd.Categorical(dev_df['country_code']).codes
train_df['category_num'] = pd.Categorical(train_df['category']).codes
dev_df['category_num'] = pd.Categorical(dev_df['category']).codes


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dev_df["country_num"] = pd.Categorical(dev_df['country_code']).codes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dev_df['category_num'] = pd.Categorical(dev_df['category']).codes


In [25]:
train_df.head()

Unnamed: 0,id,identifier,category,country_code,text,multi_label,label,text_length,country_num,category_num
0,1,@@24942188,hopeless,ph,"We 're living in time of absolute insanity , a...",0,0,620,14,2
1,2,@@21968160,migrant,gh,"In Libya today , there are countless number of...",0,0,237,4,5
2,3,@@16584954,immigrant,ie,White House press secretary Sean Spicer said t...,0,0,158,6,3
3,4,@@7811231,disabled,nz,Council customer only sign would be displayed ...,0,0,162,13,0
4,5,@@1494111,refugee,ca,`` Just like we received migrant fleeing El Sa...,0,0,273,2,7


In [26]:
dev_df.head()

Unnamed: 0_level_0,identifier,category,country_code,text,multi_label,label,text_length,country_num,category_num
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
107,@@16900972,homeless,ke,"His present "" chambers "" may be quite humble ,...",3,1,394,9,1
149,@@1387882,disabled,us,Krueger recently harnessed that creativity to ...,2,1,296,18,0
151,@@19974860,poor-families,in,10:41am - Parents of children who died must ge...,3,1,138,7,6
154,@@20663936,disabled,ng,When some people feel causing problem for some...,4,1,496,12,0
157,@@21712008,poor-families,ca,We are alarmed to learn of your recently circu...,4,1,601,2,6


In [48]:
import torch
import torch.nn as nn
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from sklearn.metrics import precision_recall_fscore_support, accuracy_score

class MultimodalModel(nn.Module):
    def __init__(self, num_categories=10, num_countries=20, num_epochs=1):
        super(MultimodalModel, self).__init__()
        self.num_labels = 2  # Assuming binary classification
        self.id2label = {0: "NEGATIVE", 1: "POSITIVE"}
        self.label2id = {"NEGATIVE": 0, "POSITIVE": 1}

        # Initialize tokenizer and base RoBERTa model
        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        self.model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=self.num_labels, id2label=self.id2label, label2id=self.label2id)

        self.category_embedding = nn.Embedding(num_embeddings=num_categories, embedding_dim=4)
        self.country_embedding = nn.Embedding(num_embeddings=num_countries, embedding_dim=4)

        self.text_length_linear = nn.Linear(1, 4)

        self.combined_fc = nn.Linear(768 + 4 + 4 + 4, 512)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(512, self.num_labels)
        self.loss_fn = nn.CrossEntropyLoss()

        # Training arguments
        self.train_args = TrainingArguments(
            output_dir='./results',
            num_train_epochs=num_epochs,
            per_device_train_batch_size=32,
            per_device_eval_batch_size=128,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=10,
            evaluation_strategy='epoch'
        )

    def apply_tokenizer(self, batch):
        # Tokenize text data
        tokenized_inputs = self.tokenizer(
            batch["text"],
            truncation=True,
            padding=True,
            max_length=100,
            add_special_tokens=True,
        )

        # Add additional features directly to the tokenized inputs
        # Assuming these columns are present in your DataFrame and thus in your batch
        tokenized_inputs['category_num'] = batch['category_num']
        tokenized_inputs['country_num'] = batch['country_num']
        tokenized_inputs['text_length'] = [[length] for length in batch['text_length']]  # Ensuring it is a list of lists for proper batching

        return tokenized_inputs


    def compute_metrics(self, pred):
        labels = pred.label_ids
        preds = pred.predictions.argmax(-1)
        precision, recall, f1, _ = precision_recall_fscore_support(
            labels, preds, average="binary"
        )
        acc = accuracy_score(labels, preds)
        return {
            "accuracy": acc,
            "f1": f1,
            "precision": precision,
            "recall": recall,
        }

    def forward(self, input_ids, attention_mask=None, categories=None, countries=None, text_lengths=None, labels=None):

        # Process text input through RoBERTa
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output

        # Embeddings for categorical features
        category_features = self.category_embedding(categories)
        country_features = self.country_embedding(countries)

        # Process numerical feature (text_length)
        text_length_features = self.text_length_linear(text_lengths.view(-1, 1))
        # Combine all features
        combined_features = torch.cat((pooled_output, category_features, country_features, text_length_features), dim=1)
        combined_features = self.dropout(combined_features)

        # Final classifier
        logits = self.classifier(combined_features)

        # Calculate loss
        loss = None
        if labels is not None:
            loss = self.loss_fn(logits, labels.view(-1))

        return logits, loss

    def train(self, train_df, dev_df):
        train_hf = Dataset.from_pandas(train_df)
        dev_hf = Dataset.from_pandas(dev_df)

        tokenized_train = train_hf.map(self.apply_tokenizer, batched=True)
        tokenized_dev = dev_hf.map(self.apply_tokenizer, batched=True)

        self.trainer = Trainer(
            model=self.model,
            args=self.train_args,
            tokenizer=self.tokenizer,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_dev,
            compute_metrics=self.compute_metrics
        )

        self.trainer.train()

    def evaluate_train(self, train_df):
        input_hf = Dataset.from_pandas(train_df)
        tokenized_input = input_hf.map(self.apply_tokenizer, batched=True)
        return self.trainer.evaluate(tokenized_input)

    def evaluate_dev(self):
        return self.trainer.evaluate()




In [50]:
model = MultimodalModel(num_epochs=5)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [51]:
model.train(train_df, dev_df)

Map:   0%|          | 0/11551 [00:00<?, ? examples/s]

Map:   0%|          | 0/2093 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1414,0.286308,0.905877,0.066351,0.583333,0.035176
2,0.1838,0.237244,0.917344,0.440129,0.618182,0.341709
3,0.0877,0.319071,0.903966,0.548315,0.495935,0.613065
4,0.0553,0.337489,0.922121,0.563003,0.603448,0.527638
5,0.0345,0.429014,0.926421,0.557471,0.651007,0.487437


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-1000 already exists and is non-empty. Saving will proceed but saved results may be invalid.
Checkpoint destination directory ./results/checkpoint-1500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


In [52]:
model.evaluate_dev()

{'eval_loss': 0.42901358008384705,
 'eval_accuracy': 0.9264214046822743,
 'eval_f1': 0.5574712643678161,
 'eval_precision': 0.6510067114093959,
 'eval_recall': 0.48743718592964824,
 'eval_runtime': 3.4673,
 'eval_samples_per_second': 603.644,
 'eval_steps_per_second': 4.903,
 'epoch': 5.0}

In [53]:
torch.save(model.state_dict(), '/content/model55.71_state_dict.pth')

In [49]:
del model

## Hyperparameter Tuning

In [54]:
internal_train_df, internal_dev_df = train_test_split(train_df, test_size=0.2, random_state=42)

In [59]:
class MultimodalModel(nn.Module):
    def __init__(self, train_batch, eval_batch, lr, embedding_dim, num_categories=10, num_countries=20, num_epochs=1):
        super(MultimodalModel, self).__init__()
        self.num_labels = 2  # Assuming binary classification
        self.id2label = {0: "NEGATIVE", 1: "POSITIVE"}
        self.label2id = {"NEGATIVE": 0, "POSITIVE": 1}

        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        self.model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=self.num_labels, id2label=self.id2label, label2id=self.label2id)

        self.category_embedding = nn.Embedding(num_embeddings=num_categories, embedding_dim=embedding_dim)
        self.country_embedding = nn.Embedding(num_embeddings=num_countries, embedding_dim=embedding_dim)

        self.text_length_linear = nn.Linear(1, embedding_dim)

        self.combined_fc = nn.Linear(768 + embedding_dim + embedding_dim + embedding_dim, 512)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Linear(512, self.num_labels)
        self.loss_fn = nn.CrossEntropyLoss()

        self.train_args = TrainingArguments(
            output_dir='./results',
            num_train_epochs=num_epochs,
            per_device_train_batch_size=train_batch,
            per_device_eval_batch_size=eval_batch,
            warmup_steps=500,
            weight_decay=0.01,
            logging_dir='./logs',
            logging_steps=10,
            evaluation_strategy='epoch',
            learning_rate=lr
        )

    def apply_tokenizer(self, batch):
        tokenized_inputs = self.tokenizer(
            batch["text"],
            truncation=True,
            padding=True,
            max_length=100,
            add_special_tokens=True,
        )

        tokenized_inputs['category_num'] = batch['category_num']
        tokenized_inputs['country_num'] = batch['country_num']
        tokenized_inputs['text_length'] = [[length] for length in batch['text_length']]

        return tokenized_inputs


    def compute_metrics(self, pred):
        labels = pred.label_ids
        preds = pred.predictions.argmax(-1)
        precision, recall, f1, _ = precision_recall_fscore_support(
            labels, preds, average="binary"
        )
        acc = accuracy_score(labels, preds)
        return {
            "accuracy": acc,
            "f1": f1,
            "precision": precision,
            "recall": recall,
        }

    def forward(self, input_ids, attention_mask=None, categories=None, countries=None, text_lengths=None, labels=None):

        outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs.pooler_output

        category_features = self.category_embedding(categories)
        country_features = self.country_embedding(countries)

        text_length_features = self.text_length_linear(text_lengths.view(-1, 1))
        combined_features = torch.cat((pooled_output, category_features, country_features, text_length_features), dim=1)
        combined_features = self.dropout(combined_features)

        logits = self.classifier(combined_features)

        loss = None
        if labels is not None:
            loss = self.loss_fn(logits, labels.view(-1))

        return logits, loss

    def train(self, train_df, dev_df):
        train_hf = Dataset.from_pandas(train_df)
        dev_hf = Dataset.from_pandas(dev_df)

        tokenized_train = train_hf.map(self.apply_tokenizer, batched=True)
        tokenized_dev = dev_hf.map(self.apply_tokenizer, batched=True)

        self.trainer = Trainer(
            model=self.model,
            args=self.train_args,
            tokenizer=self.tokenizer,
            train_dataset=tokenized_train,
            eval_dataset=tokenized_dev,
            compute_metrics=self.compute_metrics,
        )

        self.trainer.train()

    def evaluate_train(self, train_df):
        input_hf = Dataset.from_pandas(train_df)
        tokenized_input = input_hf.map(self.apply_tokenizer, batched=True)
        return self.trainer.evaluate(tokenized_input)

    def evaluate_dev(self):
        return self.trainer.evaluate()




In [60]:
lrs = [5e-5,5e-4,1e-5]
b_sizes = [32,64,128]
embedding_dim = [2,4]

In [62]:
model_0 =  MultimodalModel(b_sizes[2], b_sizes[2], lrs[1], embedding_dim[0], num_epochs=5)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [63]:
model_0.train(internal_train_df, internal_dev_df)

Map:   0%|          | 0/9240 [00:00<?, ? examples/s]

Map:   0%|          | 0/2311 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.1916,0.257119,0.921679,0.872085,0.998382,0.774153
2,0.1812,0.243533,0.932497,0.894452,0.970631,0.82936
3,0.2842,0.183856,0.936824,0.902406,0.965665,0.846926
4,0.1765,0.268725,0.928602,0.891376,0.937673,0.849435
5,0.1763,0.363588,0.919083,0.878334,0.912162,0.846926


In [66]:
del model_0

In [64]:
model_1 =  MultimodalModel(b_sizes[2], b_sizes[2], lrs[2], embedding_dim[1], num_epochs=5)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [65]:
model_1.train(internal_train_df, internal_dev_df)

Map:   0%|          | 0/9240 [00:00<?, ? examples/s]

Map:   0%|          | 0/2311 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.6599,0.647902,0.655128,0.0,0.0,0.0
2,0.3587,0.282484,0.91865,0.867232,0.991922,0.770389
3,0.2065,0.215247,0.926439,0.882434,0.983051,0.800502
4,0.153,0.190937,0.929035,0.888738,0.967504,0.821832
5,0.1461,0.194964,0.931199,0.893645,0.95702,0.838143


  _warn_prf(average, modifier, msg_start, len(result))


In [67]:
del model_1

In [68]:
model_2 =  MultimodalModel(b_sizes[1], b_sizes[1], lrs[0], embedding_dim[1], num_epochs=5)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [70]:
model_2.train(internal_train_df, internal_dev_df)

Map:   0%|          | 0/9240 [00:00<?, ? examples/s]

Map:   0%|          | 0/2311 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.2081,0.263408,0.922977,0.874824,0.9952,0.780427
2,0.1615,0.219879,0.925141,0.87961,0.9875,0.792974
3,0.1385,0.177993,0.940286,0.908123,0.967376,0.855709
4,0.0818,0.236598,0.932497,0.898305,0.934871,0.864492
5,0.0355,0.30517,0.937689,0.903872,0.965763,0.849435


Checkpoint destination directory ./results/checkpoint-500 already exists and is non-empty. Saving will proceed but saved results may be invalid.


In [72]:
del model_2

In [73]:
model_3 =  MultimodalModel(b_sizes[2], b_sizes[2], lrs[0], embedding_dim[1], num_epochs=5)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [74]:
model_3.train(internal_train_df, internal_dev_df)

Map:   0%|          | 0/9240 [00:00<?, ? examples/s]

Map:   0%|          | 0/2311 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3388,0.29352,0.916919,0.863442,0.996716,0.761606
2,0.2035,0.23636,0.922544,0.874386,0.992038,0.781681
3,0.1566,0.191355,0.931199,0.892785,0.965015,0.830615
4,0.1342,0.32569,0.921679,0.872265,0.996774,0.775408
5,0.0862,0.197498,0.933362,0.899609,0.936228,0.865747


In [75]:
del model_3

In [76]:
model_4 =  MultimodalModel(b_sizes[2], b_sizes[2], lrs[0], embedding_dim[0], num_epochs=5)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [77]:
model_4.train(internal_train_df, internal_dev_df)

Map:   0%|          | 0/9240 [00:00<?, ? examples/s]

Map:   0%|          | 0/2311 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3388,0.29352,0.916919,0.863442,0.996716,0.761606
2,0.2035,0.23636,0.922544,0.874386,0.992038,0.781681
3,0.1566,0.191355,0.931199,0.892785,0.965015,0.830615
4,0.1342,0.32569,0.921679,0.872265,0.996774,0.775408
5,0.0862,0.197498,0.933362,0.899609,0.936228,0.865747


In [78]:
del model_4