# Using **RoBERTa** for DOS

### Setup

In [None]:
!unzip Project6_Group2.zip

Archive:  Project6_Group2.zip
  inflating: Group_2/test.csv        
  inflating: Group_2/train.csv       


In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!pip install -qqq transformers

[K     |████████████████████████████████| 5.5 MB 34.0 MB/s 
[K     |████████████████████████████████| 7.6 MB 54.2 MB/s 
[K     |████████████████████████████████| 163 kB 68.5 MB/s 
[?25h

In [None]:
import pandas as pd
import numpy as np
import torch
import torchvision

### Pre-processing of Dataset

In [None]:
train_data = pd.read_csv('/content/Group_2/train.csv')
train_data

Unnamed: 0,text,label_sexist,label_category,label_vector
0,"Who does Jenner think she is, Hillary Clinton?...",not sexist,none,none
1,these women were pretty dumb to think a big ri...,not sexist,none,none
2,This. Also with the girls who have the potenti...,not sexist,none,none
3,We actually agree to a point. The problem is t...,not sexist,none,none
4,Plenty of women so it's still a minority.,not sexist,none,none
...,...,...,...,...
7995,There are direct racial parallels that I won't...,not sexist,none,none
7996,[USER] Isn't she the one that openly said her ...,not sexist,none,none
7997,"Unfortunately, we know some men are ready to s...",sexist,2. derogation,2.2 aggressive and emotive attacks
7998,"So, your mother and father are nazis? Do you l...",not sexist,none,none


In [None]:
from sklearn.model_selection import train_test_split
train_x,val_x, train_y, val_y = train_test_split(train_data["text"].to_numpy(), train_data["label_sexist"].to_numpy(), test_size=0.33, random_state=0)

In [None]:
from transformers import RobertaTokenizer

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')


Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/481 [00:00<?, ?B/s]

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [0 if label=="not sexist"  else 1 for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y

In [None]:
train_dataset = Dataset(train_x,train_y)
val_dataset = Dataset(val_x,val_y)


### Model (uses RoBERTa)

In [None]:
from torch import nn
from transformers import RobertaModel

class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5, num_classes=2):

        super(BertClassifier, self).__init__()

        self.bert = RobertaModel.from_pretrained('roberta-base')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, num_classes)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_output = self.relu(linear_output)

        return final_output

In [None]:
from tqdm import tqdm

def train(model, train_dataset, val_dataset, learning_rate, epochs, model_name="2class"):

    best_eval_loss = 1e9


    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=8)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr= learning_rate)

    if use_cuda:

            model = model.cuda()
            criterion = criterion.cuda()

    for epoch_num in range(epochs):

            total_acc_train = 0
            total_loss_train = 0

            for train_input, train_label in tqdm(train_dataloader):

                train_label = train_label.to(device)
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)
                
                batch_loss = criterion(output, train_label.long())
                total_loss_train += batch_loss.item()
                
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc

                model.zero_grad()
                batch_loss.backward()
                optimizer.step()
            
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in tqdm(val_dataloader):

                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)

                    output = model(input_id, mask)

                    batch_loss = criterion(output, val_label.long())
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
            
            print(
                f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_dataset): .3f} \
                | Train Accuracy: {total_acc_train / len(train_dataset): .3f} \
                | Val Loss: {total_loss_val / len(val_dataset): .3f} \
                | Val Accuracy: {total_acc_val / len(val_dataset): .3f}')
            
            if total_loss_val / len(val_dataset) < best_eval_loss:
                best_eval_loss = total_loss_val / len(val_dataset)
                torch.save(model.state_dict(), f"/content/drive/MyDrive/NLP Project/models/{model_name}_model_roberta.pth")
                  


### 2-Class classification

In [None]:
num_epochs = 10
model = RobertaClassifier(num_classes=2)
lr = 1e-6
              
train(model, train_dataset, val_dataset, lr, num_epochs)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.bias', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 670/670 [07:34<00:00,  1.47it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 1 | Train Loss:  0.073                 | Train Accuracy:  0.711                 | Val Loss:  0.062                 | Val Accuracy:  0.754


100%|██████████| 670/670 [07:40<00:00,  1.46it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 2 | Train Loss:  0.057                 | Train Accuracy:  0.766                 | Val Loss:  0.050                 | Val Accuracy:  0.824


100%|██████████| 670/670 [07:39<00:00,  1.46it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 3 | Train Loss:  0.044                 | Train Accuracy:  0.856                 | Val Loss:  0.044                 | Val Accuracy:  0.848


100%|██████████| 670/670 [07:38<00:00,  1.46it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 4 | Train Loss:  0.034                 | Train Accuracy:  0.895                 | Val Loss:  0.044                 | Val Accuracy:  0.848


100%|██████████| 670/670 [07:38<00:00,  1.46it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 5 | Train Loss:  0.026                 | Train Accuracy:  0.926                 | Val Loss:  0.046                 | Val Accuracy:  0.857


100%|██████████| 670/670 [07:38<00:00,  1.46it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 6 | Train Loss:  0.018                 | Train Accuracy:  0.959                 | Val Loss:  0.054                 | Val Accuracy:  0.822


100%|██████████| 670/670 [07:38<00:00,  1.46it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 7 | Train Loss:  0.012                 | Train Accuracy:  0.977                 | Val Loss:  0.056                 | Val Accuracy:  0.846


 86%|████████▌ | 576/670 [06:35<01:04,  1.45it/s]


KeyboardInterrupt: ignored

### 5-Class classification

In [None]:

from sklearn.model_selection import train_test_split
train_x,val_x, train_y, val_y = train_test_split(train_data["text"].to_numpy(), train_data["label_category"].to_numpy(), test_size=0.33, random_state=0)

In [None]:
category_types = set(list(val_y))
category_types

{'1. threats, plans to harm and incitement',
 '2. derogation',
 '3. animosity',
 '4. prejudiced discussions',
 'none'}

In [None]:
category_dict = {
    '1. threats, plans to harm and incitement': 1,
    '2. derogation': 2,
    '3. animosity': 3,
    '4. prejudiced discussions': 4,
    'none': 0
}

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [category_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y

In [None]:
train_dataset = Dataset(train_x,train_y)
val_dataset = Dataset(val_x,val_y)


In [None]:
num_epochs = 10
model = RobertaClassifier(num_classes=5)
lr = 1e-6
              
train(model, train_dataset, val_dataset, lr, num_epochs, model_name="5class_roberta")

Downloading:   0%|          | 0.00/501M [00:00<?, ?B/s]

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.bias', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 670/670 [08:03<00:00,  1.39it/s]
100%|██████████| 330/330 [01:24<00:00,  3.90it/s]


Epochs: 1 | Train Loss:  0.125                 | Train Accuracy:  0.732                 | Val Loss:  0.103                 | Val Accuracy:  0.754


100%|██████████| 670/670 [08:05<00:00,  1.38it/s]
100%|██████████| 330/330 [01:25<00:00,  3.87it/s]


Epochs: 2 | Train Loss:  0.095                 | Train Accuracy:  0.756                 | Val Loss:  0.085                 | Val Accuracy:  0.781


100%|██████████| 670/670 [08:05<00:00,  1.38it/s]
100%|██████████| 330/330 [01:24<00:00,  3.89it/s]


Epochs: 3 | Train Loss:  0.079                 | Train Accuracy:  0.791                 | Val Loss:  0.078                 | Val Accuracy:  0.792


100%|██████████| 670/670 [08:06<00:00,  1.38it/s]
100%|██████████| 330/330 [01:25<00:00,  3.86it/s]


Epochs: 4 | Train Loss:  0.069                 | Train Accuracy:  0.815                 | Val Loss:  0.078                 | Val Accuracy:  0.793


100%|██████████| 670/670 [08:04<00:00,  1.38it/s]
100%|██████████| 330/330 [01:25<00:00,  3.88it/s]


Epochs: 5 | Train Loss:  0.061                 | Train Accuracy:  0.836                 | Val Loss:  0.079                 | Val Accuracy:  0.785


100%|██████████| 670/670 [08:04<00:00,  1.38it/s]
100%|██████████| 330/330 [01:24<00:00,  3.91it/s]


Epochs: 6 | Train Loss:  0.054                 | Train Accuracy:  0.853                 | Val Loss:  0.079                 | Val Accuracy:  0.792


100%|██████████| 670/670 [08:04<00:00,  1.38it/s]
100%|██████████| 330/330 [01:24<00:00,  3.90it/s]


Epochs: 7 | Train Loss:  0.047                 | Train Accuracy:  0.871                 | Val Loss:  0.083                 | Val Accuracy:  0.786


100%|██████████| 670/670 [08:04<00:00,  1.38it/s]
100%|██████████| 330/330 [01:24<00:00,  3.89it/s]


Epochs: 8 | Train Loss:  0.042                 | Train Accuracy:  0.887                 | Val Loss:  0.084                 | Val Accuracy:  0.777


  1%|          | 8/670 [00:06<08:54,  1.24it/s]


KeyboardInterrupt: ignored

### 12-Class classification

In [None]:

from sklearn.model_selection import train_test_split
train_x,val_x, train_y, val_y = train_test_split(train_data["text"].to_numpy(), train_data["label_vector"].to_numpy(), test_size=0.33, random_state=0)

In [None]:
set(list(val_y))

{'1.1 threats of harm',
 '1.2 incitement and encouragement of harm',
 '2.1 descriptive attacks',
 '2.2 aggressive and emotive attacks',
 '2.3 dehumanising attacks & overt sexual objectification',
 '3.1 casual use of gendered slurs, profanities, and insults',
 '3.2 immutable gender differences and gender stereotypes',
 '3.3 backhanded gendered compliments',
 '3.4 condescending explanations or unwelcome advice',
 '4.1 supporting mistreatment of individual women',
 '4.2 supporting systemic discrimination against women as a group',
 'none'}

In [None]:
vector_dict = {
    '1.1 threats of harm': 1,
    '1.2 incitement and encouragement of harm': 2,
    '2.1 descriptive attacks': 3,
    '2.2 aggressive and emotive attacks': 4,
    '2.3 dehumanising attacks & overt sexual objectification': 5,
    '3.1 casual use of gendered slurs, profanities, and insults': 6,
    '3.2 immutable gender differences and gender stereotypes': 7,
    '3.3 backhanded gendered compliments': 8,
    '3.4 condescending explanations or unwelcome advice': 9,
    '4.1 supporting mistreatment of individual women': 10,
    '4.2 supporting systemic discrimination against women as a group': 11,
    'none': 0
}

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [vector_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y

In [None]:
train_dataset = Dataset(train_x,train_y)
val_dataset = Dataset(val_x,val_y)


In [None]:
num_epochs = 10
model = BertClassifier(num_classes=12)
lr = 1e-6
              
train(model, train_dataset, val_dataset, lr, num_epochs, model_name="12class")

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 670/670 [08:08<00:00,  1.37it/s]
100%|██████████| 330/330 [01:27<00:00,  3.79it/s]


Epochs: 1 | Train Loss:  0.172                 | Train Accuracy:  0.695                 | Val Loss:  0.130                 | Val Accuracy:  0.754


100%|██████████| 670/670 [08:08<00:00,  1.37it/s]
100%|██████████| 330/330 [01:27<00:00,  3.79it/s]


Epochs: 2 | Train Loss:  0.125                 | Train Accuracy:  0.749                 | Val Loss:  0.116                 | Val Accuracy:  0.754


100%|██████████| 670/670 [08:07<00:00,  1.37it/s]
100%|██████████| 330/330 [01:26<00:00,  3.80it/s]


Epochs: 3 | Train Loss:  0.111                 | Train Accuracy:  0.757                 | Val Loss:  0.109                 | Val Accuracy:  0.773


100%|██████████| 670/670 [08:07<00:00,  1.37it/s]
100%|██████████| 330/330 [01:27<00:00,  3.79it/s]


Epochs: 4 | Train Loss:  0.100                 | Train Accuracy:  0.782                 | Val Loss:  0.108                 | Val Accuracy:  0.789


100%|██████████| 670/670 [08:07<00:00,  1.37it/s]
100%|██████████| 330/330 [01:27<00:00,  3.79it/s]


Epochs: 5 | Train Loss:  0.090                 | Train Accuracy:  0.804                 | Val Loss:  0.112                 | Val Accuracy:  0.749


100%|██████████| 670/670 [08:08<00:00,  1.37it/s]
100%|██████████| 330/330 [01:27<00:00,  3.78it/s]


Epochs: 6 | Train Loss:  0.081                 | Train Accuracy:  0.817                 | Val Loss:  0.105                 | Val Accuracy:  0.789


100%|██████████| 670/670 [08:07<00:00,  1.37it/s]
100%|██████████| 330/330 [01:27<00:00,  3.75it/s]


Epochs: 7 | Train Loss:  0.073                 | Train Accuracy:  0.837                 | Val Loss:  0.107                 | Val Accuracy:  0.774


100%|██████████| 670/670 [08:07<00:00,  1.37it/s]
100%|██████████| 330/330 [01:27<00:00,  3.75it/s]


Epochs: 8 | Train Loss:  0.067                 | Train Accuracy:  0.847                 | Val Loss:  0.110                 | Val Accuracy:  0.787


100%|██████████| 670/670 [08:08<00:00,  1.37it/s]
100%|██████████| 330/330 [01:27<00:00,  3.78it/s]


Epochs: 9 | Train Loss:  0.062                 | Train Accuracy:  0.853                 | Val Loss:  0.116                 | Val Accuracy:  0.750


100%|██████████| 670/670 [08:08<00:00,  1.37it/s]
100%|██████████| 330/330 [01:27<00:00,  3.79it/s]

Epochs: 10 | Train Loss:  0.058                 | Train Accuracy:  0.866                 | Val Loss:  0.113                 | Val Accuracy:  0.767





### Training 12-class classification from 5-class classification pre-training

In [None]:
vector_dict = {
    '1.1 threats of harm': 1,
    '1.2 incitement and encouragement of harm': 2,
    '2.1 descriptive attacks': 3,
    '2.2 aggressive and emotive attacks': 4,
    '2.3 dehumanising attacks & overt sexual objectification': 5,
    '3.1 casual use of gendered slurs, profanities, and insults': 6,
    '3.2 immutable gender differences and gender stereotypes': 7,
    '3.3 backhanded gendered compliments': 8,
    '3.4 condescending explanations or unwelcome advice': 9,
    '4.1 supporting mistreatment of individual women': 10,
    '4.2 supporting systemic discrimination against women as a group': 11,
    'none': 0
}

In [None]:
from sklearn.model_selection import train_test_split
train_x,val_x, train_y, val_y = train_test_split(train_data["text"].to_numpy(), train_data["label_vector"].to_numpy(), test_size=0.33, random_state=0)

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [vector_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y

In [None]:
train_dataset = Dataset(train_x,train_y)
val_dataset = Dataset(val_x,val_y)

In [None]:
model_5class = BertClassifier(num_classes=5)
model_5class.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/5class_model_roberta.pth"))

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
model_12class = BertClassifier(num_classes=12)
model_12class.bert.load_state_dict(model_5class.bert.state_dict())

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
num_epochs = 10
lr = 1e-6
train(model_12class, train_dataset, val_dataset, lr, num_epochs, model_name="12class_finetuned")

100%|██████████| 670/670 [07:43<00:00,  1.45it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 1 | Train Loss:  0.128                 | Train Accuracy:  0.744                 | Val Loss:  0.114                 | Val Accuracy:  0.772


100%|██████████| 670/670 [07:41<00:00,  1.45it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 2 | Train Loss:  0.093                 | Train Accuracy:  0.798                 | Val Loss:  0.109                 | Val Accuracy:  0.775


100%|██████████| 670/670 [07:41<00:00,  1.45it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 3 | Train Loss:  0.082                 | Train Accuracy:  0.816                 | Val Loss:  0.108                 | Val Accuracy:  0.769


100%|██████████| 670/670 [07:41<00:00,  1.45it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 4 | Train Loss:  0.073                 | Train Accuracy:  0.834                 | Val Loss:  0.108                 | Val Accuracy:  0.777


100%|██████████| 670/670 [07:41<00:00,  1.45it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 5 | Train Loss:  0.066                 | Train Accuracy:  0.839                 | Val Loss:  0.110                 | Val Accuracy:  0.758


100%|██████████| 670/670 [07:41<00:00,  1.45it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 6 | Train Loss:  0.062                 | Train Accuracy:  0.848                 | Val Loss:  0.110                 | Val Accuracy:  0.789


100%|██████████| 670/670 [07:41<00:00,  1.45it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 7 | Train Loss:  0.058                 | Train Accuracy:  0.865                 | Val Loss:  0.110                 | Val Accuracy:  0.779


100%|██████████| 670/670 [07:41<00:00,  1.45it/s]
100%|██████████| 330/330 [01:20<00:00,  4.12it/s]


Epochs: 8 | Train Loss:  0.054                 | Train Accuracy:  0.875                 | Val Loss:  0.110                 | Val Accuracy:  0.776


 89%|████████▉ | 598/670 [06:52<00:49,  1.45it/s]

### Evaluation results

In [None]:
from sklearn.metrics import f1_score
def evaluate(model, test_dataset,num_classes=2):


    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = model.to(device)

    total_acc_test = 0
    total_f1_score = 0
    with torch.no_grad():

        for test_input, test_label in tqdm(test_dataloader):

              test_label = test_label.to(device)
              mask = test_input['attention_mask'].to(device)
              input_id = test_input['input_ids'].squeeze(1).to(device)

              output = model(input_id, mask)
              f1 = f1_score((output.argmax(dim=1)).cpu().numpy(),test_label.cpu().numpy(),average="macro")
              acc = (output.argmax(dim=1) == test_label).sum().item()
              total_acc_test += acc
              total_f1_score += f1
    
    print(f'Test Accuracy: {total_acc_test / len(test_dataset): .3f}')
    print(f'Test F1-score: {total_f1_score / len(test_dataloader): .3f}')
    


In [None]:
test_data = pd.read_csv('/content/Group_2/test.csv')
test_data

Unnamed: 0,text,label_sexist,label_category,label_vector
0,Trying to cut right now.,not sexist,none,none
1,I don't pay these assholes to use their server...,not sexist,none,none
2,Long term strategic thinking is a rare skill i...,sexist,4. prejudiced discussions,4.2 supporting systemic discrimination against...
3,tell that cunt to come to my house and take it...,sexist,"1. threats, plans to harm and incitement",1.1 threats of harm
4,Loretta Lynn fractures her hip A statement pos...,not sexist,none,none
...,...,...,...,...
1995,"she is a man, abort.",not sexist,none,none
1996,Your mom's worn down,not sexist,none,none
1997,So dumb!,not sexist,none,none
1998,Why is raping old White woman an exclusively B...,not sexist,none,none


In [None]:
text, label_sexist, label_category, label_vector = \
test_data["text"].to_numpy(), test_data["label_sexist"].to_numpy(), \
test_data["label_category"].to_numpy(), test_data["label_vector"].to_numpy()

In [None]:
sexist_dict = {
    "sexist": 1,
    "not sexist": 0
}
category_dict = {
    '1. threats, plans to harm and incitement': 1,
    '2. derogation': 2,
    '3. animosity': 3,
    '4. prejudiced discussions': 4,
    'none': 0
}
vector_dict = {
    '1.1 threats of harm': 1,
    '1.2 incitement and encouragement of harm': 2,
    '2.1 descriptive attacks': 3,
    '2.2 aggressive and emotive attacks': 4,
    '2.3 dehumanising attacks & overt sexual objectification': 5,
    '3.1 casual use of gendered slurs, profanities, and insults': 6,
    '3.2 immutable gender differences and gender stereotypes': 7,
    '3.3 backhanded gendered compliments': 8,
    '3.4 condescending explanations or unwelcome advice': 9,
    '4.1 supporting mistreatment of individual women': 10,
    '4.2 supporting systemic discrimination against women as a group': 11,
    'none': 0
}

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [sexist_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y
        

### 2-Class testing

In [None]:
test_dataset = Dataset(text, label_sexist)

In [None]:
model = BertClassifier(num_classes=2)
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/2class_model_roberta.pth"))

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
evaluate(model, test_dataset)

100%|██████████| 1000/1000 [01:12<00:00, 13.81it/s]

Test Accuracy:  0.843
Test F1-score:  0.798





In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [category_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y
        

### 5-Class Testing

In [None]:
test_dataset = Dataset(text, label_category)

In [None]:
model = BertClassifier(num_classes=5)
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/5class_model_roberta.pth"))

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
evaluate(model_5class_roberta, test_dataset)

100%|██████████| 1000/1000 [01:12<00:00, 13.82it/s]

Test Accuracy:  0.797
Test F1-score:  0.743





In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [vector_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y


### 12-Class classification

In [None]:
test_dataset = Dataset(text, label_vector)

In [None]:
model = BertClassifier(num_classes=12)
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/12class_model_roberta.pth"))

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.layer_norm.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
evaluate(model, test_dataset, num_classes=12)

100%|██████████| 1000/1000 [01:03<00:00, 15.69it/s]

Test Accuracy:  0.785
Test F1-score:  0.728





In [None]:
model = BertClassifier(num_classes=12)
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/12class_finetuned_model_roberta.pth"))

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.dense.weight', 'lm_head.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
evaluate(model, test_dataset, num_classes=12)

100%|██████████| 1000/1000 [01:04<00:00, 15.55it/s]

Test Accuracy:  0.776
Test F1-score:  0.718



