### Import Dataset

In [None]:
!unzip Project6_Group2.zip

Archive:  Project6_Group2.zip
  inflating: Group_2/test.csv        
  inflating: Group_2/train.csv       


In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


### Install dependencies

In [None]:
!pip install -qqq transformers

[K     |████████████████████████████████| 5.5 MB 34.3 MB/s 
[K     |████████████████████████████████| 7.6 MB 66.9 MB/s 
[K     |████████████████████████████████| 163 kB 69.7 MB/s 
[?25h

In [None]:
import pandas as pd
import numpy as np
import torch
import torchvision

In [None]:
train_data = pd.read_csv('/content/Group_2/train.csv')
train_data

Unnamed: 0,text,label_sexist,label_category,label_vector
0,"Who does Jenner think she is, Hillary Clinton?...",not sexist,none,none
1,these women were pretty dumb to think a big ri...,not sexist,none,none
2,This. Also with the girls who have the potenti...,not sexist,none,none
3,We actually agree to a point. The problem is t...,not sexist,none,none
4,Plenty of women so it's still a minority.,not sexist,none,none
...,...,...,...,...
7995,There are direct racial parallels that I won't...,not sexist,none,none
7996,[USER] Isn't she the one that openly said her ...,not sexist,none,none
7997,"Unfortunately, we know some men are ready to s...",sexist,2. derogation,2.2 aggressive and emotive attacks
7998,"So, your mother and father are nazis? Do you l...",not sexist,none,none


In [None]:
from sklearn.model_selection import train_test_split
train_x,val_x, train_y, val_y = train_test_split(train_data["text"].to_numpy(), train_data["label_sexist"].to_numpy(), test_size=0.33, random_state=0)

In [None]:
from transformers import BertTokenizer

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')


Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [0 if label=="not sexist"  else 1 for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y

In [None]:
train_dataset = Dataset(train_x,train_y)
val_dataset = Dataset(val_x,val_y)


## n-class BertClassifier Model Architecture

In [None]:
from torch import nn
from transformers import BertModel

class BertClassifier(nn.Module):

    def __init__(self, dropout=0.5, num_classes=2):

        super(BertClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, num_classes)
        self.relu = nn.ReLU()

    def forward(self, input_id, mask):

        _, pooled_output = self.bert(input_ids= input_id, attention_mask=mask,return_dict=False)
        dropout_output = self.dropout(pooled_output)
        linear_output = self.linear(dropout_output)
        final_output = self.relu(linear_output)

        return final_output

## Training Implementation

In [None]:
from tqdm import tqdm

def train(model, train_dataset, val_dataset, learning_rate, epochs, model_name="2class"):

    best_eval_loss = 1e9


    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=8, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=8)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr= learning_rate)

    if use_cuda:

            model = model.cuda()
            criterion = criterion.cuda()

    for epoch_num in range(epochs):

            total_acc_train = 0
            total_loss_train = 0

            for train_input, train_label in tqdm(train_dataloader):

                train_label = train_label.to(device)
                mask = train_input['attention_mask'].to(device)
                input_id = train_input['input_ids'].squeeze(1).to(device)

                output = model(input_id, mask)
                
                batch_loss = criterion(output, train_label.long())
                total_loss_train += batch_loss.item()
                
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc

                model.zero_grad()
                batch_loss.backward()
                optimizer.step()
            
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in tqdm(val_dataloader):

                    val_label = val_label.to(device)
                    mask = val_input['attention_mask'].to(device)
                    input_id = val_input['input_ids'].squeeze(1).to(device)

                    output = model(input_id, mask)

                    batch_loss = criterion(output, val_label.long())
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
            
            print(
                f'Epochs: {epoch_num + 1} | Train Loss: {total_loss_train / len(train_dataset): .3f} \
                | Train Accuracy: {total_acc_train / len(train_dataset): .3f} \
                | Val Loss: {total_loss_val / len(val_dataset): .3f} \
                | Val Accuracy: {total_acc_val / len(val_dataset): .3f}')
            
            if total_loss_val / len(val_dataset) < best_eval_loss:
                best_eval_loss = total_loss_val / len(val_dataset)
                torch.save(model.state_dict(), f"/content/drive/MyDrive/NLP Project/models/{model_name}_model.pth")
                  


### Training 2 Class BertClassifier Model

In [None]:
num_epochs = 10
model = BertClassifier(num_classes=2)
lr = 1e-6
              
train(model, train_dataset, val_dataset, lr, num_epochs)

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 670/670 [07:46<00:00,  1.44it/s]
100%|██████████| 330/330 [01:28<00:00,  3.72it/s]


Epochs: 1 | Train Loss:  0.079                 | Train Accuracy:  0.682                 | Val Loss:  0.070                 | Val Accuracy:  0.752


100%|██████████| 670/670 [07:58<00:00,  1.40it/s]
100%|██████████| 330/330 [01:28<00:00,  3.73it/s]


Epochs: 2 | Train Loss:  0.068                 | Train Accuracy:  0.749                 | Val Loss:  0.064                 | Val Accuracy:  0.756


100%|██████████| 670/670 [07:57<00:00,  1.40it/s]
100%|██████████| 330/330 [01:28<00:00,  3.72it/s]


Epochs: 3 | Train Loss:  0.058                 | Train Accuracy:  0.754                 | Val Loss:  0.056                 | Val Accuracy:  0.761


100%|██████████| 670/670 [07:57<00:00,  1.40it/s]
100%|██████████| 330/330 [01:28<00:00,  3.74it/s]


Epochs: 4 | Train Loss:  0.051                 | Train Accuracy:  0.761                 | Val Loss:  0.054                 | Val Accuracy:  0.762


100%|██████████| 670/670 [07:57<00:00,  1.40it/s]
100%|██████████| 330/330 [01:28<00:00,  3.73it/s]


Epochs: 5 | Train Loss:  0.044                 | Train Accuracy:  0.771                 | Val Loss:  0.053                 | Val Accuracy:  0.768


100%|██████████| 670/670 [07:57<00:00,  1.40it/s]
100%|██████████| 330/330 [01:28<00:00,  3.73it/s]


Epochs: 6 | Train Loss:  0.037                 | Train Accuracy:  0.804                 | Val Loss:  0.057                 | Val Accuracy:  0.798


100%|██████████| 670/670 [07:57<00:00,  1.40it/s]
100%|██████████| 330/330 [01:28<00:00,  3.72it/s]


Epochs: 7 | Train Loss:  0.030                 | Train Accuracy:  0.879                 | Val Loss:  0.057                 | Val Accuracy:  0.831


100%|██████████| 670/670 [07:58<00:00,  1.40it/s]
100%|██████████| 330/330 [01:28<00:00,  3.73it/s]


Epochs: 8 | Train Loss:  0.021                 | Train Accuracy:  0.948                 | Val Loss:  0.058                 | Val Accuracy:  0.819


100%|██████████| 670/670 [07:57<00:00,  1.40it/s]
100%|██████████| 330/330 [01:28<00:00,  3.73it/s]


Epochs: 9 | Train Loss:  0.016                 | Train Accuracy:  0.969                 | Val Loss:  0.062                 | Val Accuracy:  0.812


100%|██████████| 670/670 [07:58<00:00,  1.40it/s]
100%|██████████| 330/330 [01:28<00:00,  3.73it/s]

Epochs: 10 | Train Loss:  0.012                 | Train Accuracy:  0.977                 | Val Loss:  0.066                 | Val Accuracy:  0.823





### Training 5 Class BertClassifier Model

In [None]:

from sklearn.model_selection import train_test_split
train_x,val_x, train_y, val_y = train_test_split(train_data["text"].to_numpy(), train_data["label_category"].to_numpy(), test_size=0.33, random_state=0)

In [None]:
category_types = set(list(val_y))
category_types

{'1. threats, plans to harm and incitement',
 '2. derogation',
 '3. animosity',
 '4. prejudiced discussions',
 'none'}

In [None]:
category_dict = {
    '1. threats, plans to harm and incitement': 1,
    '2. derogation': 2,
    '3. animosity': 3,
    '4. prejudiced discussions': 4,
    'none': 0
}

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [category_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y

In [None]:
train_dataset = Dataset(train_x,train_y)
val_dataset = Dataset(val_x,val_y)


In [None]:
num_epochs = 10
model = BertClassifier(num_classes=5)
lr = 1e-6
              
train(model, train_dataset, val_dataset, lr, num_epochs, model_name="5class")

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 670/670 [08:06<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.54it/s]


Epochs: 1 | Train Loss:  0.131                 | Train Accuracy:  0.717                 | Val Loss:  0.112                 | Val Accuracy:  0.754


100%|██████████| 670/670 [08:06<00:00,  1.38it/s]
100%|██████████| 330/330 [01:32<00:00,  3.55it/s]


Epochs: 2 | Train Loss:  0.108                 | Train Accuracy:  0.748                 | Val Loss:  0.101                 | Val Accuracy:  0.754


100%|██████████| 670/670 [08:05<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.55it/s]


Epochs: 3 | Train Loss:  0.096                 | Train Accuracy:  0.754                 | Val Loss:  0.090                 | Val Accuracy:  0.772


100%|██████████| 670/670 [08:05<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.54it/s]


Epochs: 4 | Train Loss:  0.084                 | Train Accuracy:  0.781                 | Val Loss:  0.083                 | Val Accuracy:  0.786


100%|██████████| 670/670 [08:05<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.54it/s]


Epochs: 5 | Train Loss:  0.077                 | Train Accuracy:  0.799                 | Val Loss:  0.084                 | Val Accuracy:  0.790


100%|██████████| 670/670 [08:06<00:00,  1.38it/s]
100%|██████████| 330/330 [01:32<00:00,  3.55it/s]


Epochs: 6 | Train Loss:  0.069                 | Train Accuracy:  0.821                 | Val Loss:  0.082                 | Val Accuracy:  0.794


100%|██████████| 670/670 [08:05<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.55it/s]


Epochs: 7 | Train Loss:  0.062                 | Train Accuracy:  0.838                 | Val Loss:  0.082                 | Val Accuracy:  0.797


100%|██████████| 670/670 [08:05<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.54it/s]


Epochs: 8 | Train Loss:  0.054                 | Train Accuracy:  0.861                 | Val Loss:  0.084                 | Val Accuracy:  0.783


100%|██████████| 670/670 [08:06<00:00,  1.38it/s]
100%|██████████| 330/330 [01:32<00:00,  3.55it/s]


Epochs: 9 | Train Loss:  0.046                 | Train Accuracy:  0.889                 | Val Loss:  0.085                 | Val Accuracy:  0.789


100%|██████████| 670/670 [08:06<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.54it/s]

Epochs: 10 | Train Loss:  0.041                 | Train Accuracy:  0.909                 | Val Loss:  0.089                 | Val Accuracy:  0.773





## Training 12 Class BertClassifier Model

In [None]:

from sklearn.model_selection import train_test_split
train_x,val_x, train_y, val_y = train_test_split(train_data["text"].to_numpy(), train_data["label_vector"].to_numpy(), test_size=0.33, random_state=0)

In [None]:
set(list(val_y))

{'1.1 threats of harm',
 '1.2 incitement and encouragement of harm',
 '2.1 descriptive attacks',
 '2.2 aggressive and emotive attacks',
 '2.3 dehumanising attacks & overt sexual objectification',
 '3.1 casual use of gendered slurs, profanities, and insults',
 '3.2 immutable gender differences and gender stereotypes',
 '3.3 backhanded gendered compliments',
 '3.4 condescending explanations or unwelcome advice',
 '4.1 supporting mistreatment of individual women',
 '4.2 supporting systemic discrimination against women as a group',
 'none'}

In [None]:
vector_dict = {
    '1.1 threats of harm': 1,
    '1.2 incitement and encouragement of harm': 2,
    '2.1 descriptive attacks': 3,
    '2.2 aggressive and emotive attacks': 4,
    '2.3 dehumanising attacks & overt sexual objectification': 5,
    '3.1 casual use of gendered slurs, profanities, and insults': 6,
    '3.2 immutable gender differences and gender stereotypes': 7,
    '3.3 backhanded gendered compliments': 8,
    '3.4 condescending explanations or unwelcome advice': 9,
    '4.1 supporting mistreatment of individual women': 10,
    '4.2 supporting systemic discrimination against women as a group': 11,
    'none': 0
}

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [vector_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y

In [None]:
train_dataset = Dataset(train_x,train_y)
val_dataset = Dataset(val_x,val_y)


In [None]:
num_epochs = 10
model = BertClassifier(num_classes=12)
lr = 1e-6
              
train(model, train_dataset, val_dataset, lr, num_epochs, model_name="12class")

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 670/670 [07:42<00:00,  1.45it/s]
100%|██████████| 330/330 [01:27<00:00,  3.75it/s]


Epochs: 1 | Train Loss:  0.220                 | Train Accuracy:  0.489                 | Val Loss:  0.140                 | Val Accuracy:  0.754


100%|██████████| 670/670 [07:52<00:00,  1.42it/s]
100%|██████████| 330/330 [01:27<00:00,  3.77it/s]


Epochs: 2 | Train Loss:  0.136                 | Train Accuracy:  0.749                 | Val Loss:  0.133                 | Val Accuracy:  0.756


100%|██████████| 670/670 [07:52<00:00,  1.42it/s]
100%|██████████| 330/330 [01:27<00:00,  3.76it/s]


Epochs: 3 | Train Loss:  0.125                 | Train Accuracy:  0.757                 | Val Loss:  0.123                 | Val Accuracy:  0.757


100%|██████████| 670/670 [07:52<00:00,  1.42it/s]
100%|██████████| 330/330 [01:28<00:00,  3.75it/s]


Epochs: 4 | Train Loss:  0.116                 | Train Accuracy:  0.759                 | Val Loss:  0.120                 | Val Accuracy:  0.755


100%|██████████| 670/670 [07:52<00:00,  1.42it/s]
100%|██████████| 330/330 [01:27<00:00,  3.77it/s]


Epochs: 5 | Train Loss:  0.107                 | Train Accuracy:  0.767                 | Val Loss:  0.121                 | Val Accuracy:  0.752


100%|██████████| 670/670 [07:52<00:00,  1.42it/s]
100%|██████████| 330/330 [01:28<00:00,  3.74it/s]


Epochs: 6 | Train Loss:  0.098                 | Train Accuracy:  0.773                 | Val Loss:  0.122                 | Val Accuracy:  0.753


100%|██████████| 670/670 [07:52<00:00,  1.42it/s]
100%|██████████| 330/330 [01:28<00:00,  3.75it/s]


Epochs: 7 | Train Loss:  0.091                 | Train Accuracy:  0.787                 | Val Loss:  0.125                 | Val Accuracy:  0.752


100%|██████████| 670/670 [07:52<00:00,  1.42it/s]
100%|██████████| 330/330 [01:28<00:00,  3.75it/s]


Epochs: 8 | Train Loss:  0.084                 | Train Accuracy:  0.788                 | Val Loss:  0.128                 | Val Accuracy:  0.738


100%|██████████| 670/670 [07:52<00:00,  1.42it/s]
100%|██████████| 330/330 [01:28<00:00,  3.75it/s]


Epochs: 9 | Train Loss:  0.080                 | Train Accuracy:  0.793                 | Val Loss:  0.133                 | Val Accuracy:  0.721


100%|██████████| 670/670 [07:52<00:00,  1.42it/s]
100%|██████████| 330/330 [01:27<00:00,  3.76it/s]

Epochs: 10 | Train Loss:  0.078                 | Train Accuracy:  0.800                 | Val Loss:  0.129                 | Val Accuracy:  0.751





### Training 5 class model from pretraining on 2 class model

In [None]:
from sklearn.model_selection import train_test_split
train_x,val_x, train_y, val_y = train_test_split(train_data["text"].to_numpy(), train_data["label_category"].to_numpy(), test_size=0.33, random_state=0)

In [None]:
category_dict = {
    '1. threats, plans to harm and incitement': 1,
    '2. derogation': 2,
    '3. animosity': 3,
    '4. prejudiced discussions': 4,
    'none': 0
}

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [category_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y

In [None]:
train_dataset = Dataset(train_x,train_y)
val_dataset = Dataset(val_x,val_y)

In [None]:
model_2class = BertClassifier(num_classes=2)
model_2class.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/2class_model.pth"))

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
model_5class = BertClassifier(num_classes=5)
model_5class.bert.load_state_dict(model.bert.state_dict())

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
num_epochs = 10
lr = 1e-6
train(model_5class, train_dataset, val_dataset, lr, num_epochs, model_name="5class_finetuned")

100%|██████████| 670/670 [08:07<00:00,  1.37it/s]
100%|██████████| 330/330 [01:33<00:00,  3.53it/s]


Epochs: 1 | Train Loss:  0.108                 | Train Accuracy:  0.724                 | Val Loss:  0.091                 | Val Accuracy:  0.773


100%|██████████| 670/670 [08:03<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.53it/s]


Epochs: 2 | Train Loss:  0.074                 | Train Accuracy:  0.794                 | Val Loss:  0.090                 | Val Accuracy:  0.764


100%|██████████| 670/670 [08:04<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.52it/s]


Epochs: 3 | Train Loss:  0.064                 | Train Accuracy:  0.816                 | Val Loss:  0.092                 | Val Accuracy:  0.770


100%|██████████| 670/670 [08:03<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.53it/s]


Epochs: 4 | Train Loss:  0.058                 | Train Accuracy:  0.826                 | Val Loss:  0.094                 | Val Accuracy:  0.765


100%|██████████| 670/670 [08:03<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.53it/s]


Epochs: 5 | Train Loss:  0.052                 | Train Accuracy:  0.842                 | Val Loss:  0.097                 | Val Accuracy:  0.768


100%|██████████| 670/670 [08:03<00:00,  1.38it/s]
100%|██████████| 330/330 [01:33<00:00,  3.52it/s]


Epochs: 6 | Train Loss:  0.048                 | Train Accuracy:  0.850                 | Val Loss:  0.100                 | Val Accuracy:  0.760


100%|██████████| 670/670 [08:03<00:00,  1.39it/s]
100%|██████████| 330/330 [01:33<00:00,  3.52it/s]


Epochs: 7 | Train Loss:  0.046                 | Train Accuracy:  0.854                 | Val Loss:  0.101                 | Val Accuracy:  0.757


100%|██████████| 670/670 [08:03<00:00,  1.39it/s]
100%|██████████| 330/330 [01:33<00:00,  3.54it/s]


Epochs: 8 | Train Loss:  0.044                 | Train Accuracy:  0.856                 | Val Loss:  0.104                 | Val Accuracy:  0.767


100%|██████████| 670/670 [08:03<00:00,  1.38it/s]
 85%|████████▍ | 280/330 [01:19<00:14,  3.53it/s]

In [None]:
vector_dict = {
    '1.1 threats of harm': 1,
    '1.2 incitement and encouragement of harm': 2,
    '2.1 descriptive attacks': 3,
    '2.2 aggressive and emotive attacks': 4,
    '2.3 dehumanising attacks & overt sexual objectification': 5,
    '3.1 casual use of gendered slurs, profanities, and insults': 6,
    '3.2 immutable gender differences and gender stereotypes': 7,
    '3.3 backhanded gendered compliments': 8,
    '3.4 condescending explanations or unwelcome advice': 9,
    '4.1 supporting mistreatment of individual women': 10,
    '4.2 supporting systemic discrimination against women as a group': 11,
    'none': 0
}

### Training 12 class model from pretraining on 5 class model

In [None]:
from sklearn.model_selection import train_test_split
train_x,val_x, train_y, val_y = train_test_split(train_data["text"].to_numpy(), train_data["label_vector"].to_numpy(), test_size=0.33, random_state=0)

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [vector_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y

In [None]:
train_dataset = Dataset(train_x,train_y)
val_dataset = Dataset(val_x,val_y)

In [None]:
model_5class = BertClassifier(num_classes=5)
model_5class.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/5class_model.pth"))

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
model_12class = BertClassifier(num_classes=12)
model_12class.bert.load_state_dict(model_5class.bert.state_dict())

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
num_epochs = 10
lr = 1e-6
train(model_12class, train_dataset, val_dataset, lr, num_epochs, model_name="12class_finetuned")

100%|██████████| 670/670 [08:14<00:00,  1.35it/s]
100%|██████████| 330/330 [01:36<00:00,  3.43it/s]


Epochs: 1 | Train Loss:  0.155                 | Train Accuracy:  0.712                 | Val Loss:  0.124                 | Val Accuracy:  0.758


100%|██████████| 670/670 [08:16<00:00,  1.35it/s]
100%|██████████| 330/330 [01:35<00:00,  3.44it/s]


Epochs: 2 | Train Loss:  0.108                 | Train Accuracy:  0.764                 | Val Loss:  0.117                 | Val Accuracy:  0.769


100%|██████████| 670/670 [08:16<00:00,  1.35it/s]
100%|██████████| 330/330 [01:36<00:00,  3.44it/s]


Epochs: 3 | Train Loss:  0.096                 | Train Accuracy:  0.784                 | Val Loss:  0.114                 | Val Accuracy:  0.783


100%|██████████| 670/670 [08:16<00:00,  1.35it/s]
100%|██████████| 330/330 [01:36<00:00,  3.43it/s]


Epochs: 4 | Train Loss:  0.085                 | Train Accuracy:  0.803                 | Val Loss:  0.111                 | Val Accuracy:  0.782


100%|██████████| 670/670 [08:16<00:00,  1.35it/s]
100%|██████████| 330/330 [01:35<00:00,  3.44it/s]


Epochs: 5 | Train Loss:  0.077                 | Train Accuracy:  0.819                 | Val Loss:  0.111                 | Val Accuracy:  0.790


100%|██████████| 670/670 [08:16<00:00,  1.35it/s]
100%|██████████| 330/330 [01:36<00:00,  3.44it/s]


Epochs: 6 | Train Loss:  0.071                 | Train Accuracy:  0.829                 | Val Loss:  0.115                 | Val Accuracy:  0.766


100%|██████████| 670/670 [08:16<00:00,  1.35it/s]
100%|██████████| 330/330 [01:36<00:00,  3.44it/s]


Epochs: 7 | Train Loss:  0.067                 | Train Accuracy:  0.839                 | Val Loss:  0.113                 | Val Accuracy:  0.772


100%|██████████| 670/670 [08:17<00:00,  1.35it/s]
 37%|███▋      | 121/330 [00:35<01:00,  3.45it/s]

### Evaluation Implementation

In [None]:
from sklearn.metrics import f1_score
def evaluate(model, test_dataset,num_classes=2):


    test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = model.to(device)

    total_acc_test = 0
    total_f1_score = 0
    with torch.no_grad():

        for test_input, test_label in tqdm(test_dataloader):

              test_label = test_label.to(device)
              mask = test_input['attention_mask'].to(device)
              input_id = test_input['input_ids'].squeeze(1).to(device)

              output = model(input_id, mask)
              f1 = f1_score((output.argmax(dim=1)).cpu().numpy(),test_label.cpu().numpy(),average="macro")
              acc = (output.argmax(dim=1) == test_label).sum().item()
              total_acc_test += acc
              total_f1_score += f1
    
    print(f'Test Accuracy: {total_acc_test / len(test_dataset): .3f}')
    print(f'Test F1-score: {total_f1_score / len(test_dataloader): .3f}')
    


### Evaluation on test data

In [None]:
test_data = pd.read_csv('/content/Group_2/test.csv')
test_data

Unnamed: 0,text,label_sexist,label_category,label_vector
0,Trying to cut right now.,not sexist,none,none
1,I don't pay these assholes to use their server...,not sexist,none,none
2,Long term strategic thinking is a rare skill i...,sexist,4. prejudiced discussions,4.2 supporting systemic discrimination against...
3,tell that cunt to come to my house and take it...,sexist,"1. threats, plans to harm and incitement",1.1 threats of harm
4,Loretta Lynn fractures her hip A statement pos...,not sexist,none,none
...,...,...,...,...
1995,"she is a man, abort.",not sexist,none,none
1996,Your mom's worn down,not sexist,none,none
1997,So dumb!,not sexist,none,none
1998,Why is raping old White woman an exclusively B...,not sexist,none,none


In [None]:
text, label_sexist, label_category, label_vector = \
test_data["text"].to_numpy(), test_data["label_sexist"].to_numpy(), \
test_data["label_category"].to_numpy(), test_data["label_vector"].to_numpy()

In [None]:
sexist_dict = {
    "sexist": 1,
    "not sexist": 0
}
category_dict = {
    '1. threats, plans to harm and incitement': 1,
    '2. derogation': 2,
    '3. animosity': 3,
    '4. prejudiced discussions': 4,
    'none': 0
}
vector_dict = {
    '1.1 threats of harm': 1,
    '1.2 incitement and encouragement of harm': 2,
    '2.1 descriptive attacks': 3,
    '2.2 aggressive and emotive attacks': 4,
    '2.3 dehumanising attacks & overt sexual objectification': 5,
    '3.1 casual use of gendered slurs, profanities, and insults': 6,
    '3.2 immutable gender differences and gender stereotypes': 7,
    '3.3 backhanded gendered compliments': 8,
    '3.4 condescending explanations or unwelcome advice': 9,
    '4.1 supporting mistreatment of individual women': 10,
    '4.2 supporting systemic discrimination against women as a group': 11,
    'none': 0
}

### 2 class classification results

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [sexist_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y
        

In [None]:
test_dataset = Dataset(text, label_sexist)

In [None]:
model = BertClassifier(num_classes=2)
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/2class_model.pth"))

Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
evaluate(model, test_dataset, num_classes=2)

100%|██████████| 1000/1000 [01:20<00:00, 12.45it/s]

Test Accuracy:  0.774
Test F1-score:  0.714





### 5 class classification results

In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [category_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y
        

In [None]:
test_dataset = Dataset(text, label_category)

In [None]:
model = BertClassifier(num_classes=5)
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/5class_model.pth"))

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
evaluate(model, test_dataset, num_classes=5)

100%|██████████| 1000/1000 [01:17<00:00, 12.86it/s]

Test Accuracy:  0.794
Test F1-score:  0.740





### 12 class classification results

In [None]:
model = BertClassifier(num_classes=5)
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/5class_finetuned_model.pth"))

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
evaluate(model, test_dataset, num_classes=5)

100%|██████████| 1000/1000 [01:17<00:00, 12.88it/s]

Test Accuracy:  0.773
Test F1-score:  0.713





In [None]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self,data,labels):

        self.labels = [vector_dict[label] for label in labels]
        self.data = [tokenizer(x, 
                               padding='max_length', max_length = 512, truncation=True,
                                return_tensors="pt") for x in data]
    
    def __len__(self):
        return len(self.labels)
    
    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_texts(self, idx):
        return self.data[idx]

    def __getitem__(self, idx):

        batch_data = self.get_batch_texts(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_data, batch_y


In [None]:
test_dataset = Dataset(text, label_vector)

In [None]:
model = BertClassifier(num_classes=12)
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/12class_model.pth"))

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
evaluate(model, test_dataset, num_classes=12)

100%|██████████| 1000/1000 [01:10<00:00, 14.15it/s]

Test Accuracy:  0.758
Test F1-score:  0.696





In [None]:
model = BertClassifier(num_classes=12)
model.load_state_dict(torch.load("/content/drive/MyDrive/NLP Project/models/12class_finetuned_model.pth"))

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<All keys matched successfully>

In [None]:
evaluate(model, test_dataset, num_classes=12)

100%|██████████| 1000/1000 [01:17<00:00, 12.87it/s]

Test Accuracy:  0.784
Test F1-score:  0.726



