# Memotion BERT + VGG

In [1]:
# The OG
import os
import pandas as pd
import numpy as np
from typing import Dict, Optional, Tuple

# Torch
import torch
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler


# Transformers
from transformers import AutoModel, AutoTokenizer

#PIL
from PIL import Image,ImageFile

# sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Other
from tqdm import tqdm

from memotion_utility import load_data

In [2]:
ImageFile.LOAD_TRUNCATED_IMAGES = True
CSV_FILE = '/kaggle/input/memotion-dataset-7k/memotion_dataset_7k/labels.csv'
ROOT_DIR = '/kaggle/input/memotion-dataset-7k/memotion_dataset_7k/images'
BATCH_SIZE = 16
IMAGE_SIZE = (224, 224)
num_epochs = 20
IMAGE_MODEL_NAME = 'vgg19'
TEXT_MODEL_NAME = 'bert-base-uncased'
MAX_LEN = 128
LR = 1e-06
embedding_size = 768
downsample = True

## Dataset

In [3]:
# def preprocess(df):
#     df = df.drop('Unnamed: 0', axis=1)
#     df = df.sample(frac=1).reset_index(drop=True)
#     df['offensive'] = np.where(df['offensive'] == 'not_offensive', 'not_offensive', 'offensive')

#     df['offensive'] = df['offensive'].map({
#         'not_offensive': 0, 
#         'offensive': 1
#     })
#     return df

In [4]:
df = pd.read_csv(CSV_FILE)
df

Unnamed: 0.1,Unnamed: 0,image_name,text_ocr,text_corrected,humour,sarcasm,offensive,motivational,overall_sentiment
0,0,image_1.jpg,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,hilarious,general,not_offensive,not_motivational,very_positive
1,1,image_2.jpeg,The best of #10 YearChallenge! Completed in le...,The best of #10 YearChallenge! Completed in le...,not_funny,general,not_offensive,motivational,very_positive
2,2,image_3.JPG,Sam Thorne @Strippin ( Follow Follow Saw every...,Sam Thorne @Strippin ( Follow Follow Saw every...,very_funny,not_sarcastic,not_offensive,not_motivational,positive
3,3,image_4.png,10 Year Challenge - Sweet Dee Edition,10 Year Challenge - Sweet Dee Edition,very_funny,twisted_meaning,very_offensive,motivational,positive
4,4,image_5.png,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,hilarious,very_twisted,very_offensive,not_motivational,neutral
...,...,...,...,...,...,...,...,...,...
6987,6987,image_6988.jpg,Tuesday is Mardi Gras Wednesday is Valentine's...,Tuesday is Mardi Gras Wednesday is Valentine's...,very_funny,twisted_meaning,very_offensive,motivational,neutral
6988,6988,image_6989.jpg,MUST WATCH MOVIES OF 2017 ITI Chennai memes MA...,MUST WATCH MOVIES OF 2017 ITI Chennai memes MA...,funny,twisted_meaning,not_offensive,not_motivational,neutral
6989,6989,image_6990.png,LESS MORE TALKING PLANNING SODA JUNK FOOD COMP...,LESS MORE TALKING PLANNING SODA JUNK FOOD COMP...,funny,general,slight,not_motivational,positive
6990,6990,image_6991.jpg,When I VERY have time is a fantasy No one has ...,When I have time is a fantasy. no one has time...,not_funny,twisted_meaning,not_offensive,motivational,very_positive


In [5]:
# df = preprocess(df)
# df

In [6]:
df_train,df_val,df_test = load_data(CSV_FILE,downsample = downsample)

train : 
 label
1    1953
0    1953
Name: count, dtype: int64
val : 
 label
1    343
0    217
Name: count, dtype: int64
test : 
 label
1    856
0    543
Name: count, dtype: int64


In [7]:
class MemeDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.df =  dataframe
        self.root_dir = root_dir
        self.tokenizer = AutoTokenizer.from_pretrained(TEXT_MODEL_NAME)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        
        # Image
        img_name = os.path.join(self.root_dir, self.df.iloc[idx]['image_name'])
        image = Image.open(img_name).convert("RGB")
        if self.transform:
            image = self.transform(image)
            
        # Text
        text = str(self.df.iloc[idx]['text']).lower()
        out = self.tokenizer(
            text=text, 
            max_length=MAX_LEN,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        # Label
        label = self.df.iloc[idx]['label']
        return  {
            'image': image, 
            'input_ids': out['input_ids'].squeeze(),
            'attention_mask': out['attention_mask'].squeeze(),
            'label': torch.tensor(label, dtype=torch.long)
        }

In [8]:
def load_dataloader(df,transform):
    df_train,df_val,df_test = load_data(CSV_FILE,downsample = downsample)
    
    train_dataset = MemeDataset(df_train,ROOT_DIR,transform = transform)
    val_dataset = MemeDataset(df_val,ROOT_DIR,transform = transform)
    test_dataset = MemeDataset(df_test,ROOT_DIR,transform = transform)
    
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
    
    return train_loader,val_loader,test_loader

In [9]:
transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

train_loader,val_loader,test_loader = load_dataloader(CSV_FILE,transform)

train : 
 label
1    1953
0    1953
Name: count, dtype: int64
val : 
 label
1    343
0    217
Name: count, dtype: int64
test : 
 label
1    856
0    543
Name: count, dtype: int64


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

## Model

In [10]:
class ImageEncoder(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.encoder = models.vgg19(pretrained=True)
        self.encoder.classifier = nn.Sequential(*list(self.encoder.classifier.children())[:-1]) # remove last layer
        #for param in self.encoder.parameters():
            #param.requires_grad = False
         
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.encoder(x)
        return x

In [11]:
class TextEncoder(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.encoder = AutoModel.from_pretrained(TEXT_MODEL_NAME)  

    def forward(
        self, input_ids: torch.Tensor, attention_mask: torch.Tensor
    ) -> torch.Tensor:
        x = self.encoder.forward(input_ids=input_ids, attention_mask=attention_mask)
        return x["pooler_output"]

In [12]:
class MemotionModel(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.image_encoder = ImageEncoder()
        self.text_encoder = TextEncoder()
        
        self.fusion_embedding_size =  768 + 4096
        
        self.fc = nn.Linear(self.fusion_embedding_size, 2)

        #self.fc1 = nn.Linear(768, 512)
        #self.fc2 = nn.Linear(512, 128)
        #self.fc3 = nn.Linear(128, 2)
        #self.dropout = nn.Dropout(p=0.2)

    def forward(
        self, image: torch.Tensor, input_ids: torch.Tensor, attention_mask: torch.Tensor, label: Optional[torch.Tensor] = None,
    ) -> torch.Tensor:
        img_out = self.image_encoder.forward(image)
        txt_out = self.text_encoder.forward(input_ids=input_ids, attention_mask=attention_mask)
        #print(img_out.shape,txt_out.shape)
        fused_features = torch.cat((txt_out, img_out),dim=1)  # Concatenation
        predictions = self.fc(fused_features)
        return predictions


## Training

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MemotionModel().to(device)
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(
            model.parameters(),
            lr=LR
        )

# scheduler = lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=int(30000/32*10)+50, eta_min=1e-6)
scheduler = None

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.cache/torch/hub/checkpoints/vgg19-dcbb9e9d.pth
100%|██████████| 548M/548M [00:03<00:00, 169MB/s]


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [14]:
best_f1 = 0.0
best_model_state = None

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"(train) ")
    all_labels = []
    all_predictions = []
    for step, batch in pbar:
        batch = {k: v.to(device) for k, v in batch.items()}
        labels = batch["label"]
        yHat = model(**batch)
        out = torch.argmax(yHat, axis=1)
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(out.cpu().numpy())
        optimizer.zero_grad()
        loss = criterion(yHat, labels)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        if scheduler is not None:
            scheduler.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")
    
    accuracy = accuracy_score(all_labels, all_predictions)
    precision = precision_score(all_labels, all_predictions, average='macro')
    recall = recall_score(all_labels, all_predictions, average='macro')
    f1 = f1_score(all_labels, all_predictions, average='macro')
    conf_matrix = confusion_matrix(all_labels, all_predictions)
    class_report = classification_report(all_labels, all_predictions, target_names=['Not Offensive', 'Offensive'])

    # Print metrics
#     print(f"Accuracy: {accuracy * 100:.2f}%")
#     print(f"Precision: {precision:.2f}")
#     print(f"Recall: {recall:.2f}")
#     print(f"F1 Score: {f1:.2f}")
#     print("Confusion Matrix:")
#     print(conf_matrix)
#     print("Classification Report:")
#     print(class_report)

    # Evaluation
    model.eval()
    all_labels = []
    all_predictions = []
    with torch.no_grad():
        pbar = tqdm(enumerate(val_loader), total=len(val_loader), desc=f"(valid) ")
        for step, batch in pbar:
            batch = {k: v.to(device) for k, v in batch.items()}
            labels = batch["label"]
            yHat = model(**batch)
            out = torch.argmax(yHat, axis=1)
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(out.cpu().numpy())

    # Compute metrics
    accuracy = accuracy_score(all_labels, all_predictions)
    precision = precision_score(all_labels, all_predictions, average='macro')
    recall = recall_score(all_labels, all_predictions, average='macro')
    f1 = f1_score(all_labels, all_predictions, average='macro')
    conf_matrix = confusion_matrix(all_labels, all_predictions)
    class_report = classification_report(all_labels, all_predictions, target_names=['Not Offensive', 'Offensive'])

    # Print metrics
    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1 Score: {f1:.2f}")
    print("Confusion Matrix:")
    print(conf_matrix)
    print("Classification Report:")
    print(class_report)
    
    if f1 > best_f1:
        best_f1 = f1
        best_model_state = model.state_dict()

(train) : 100%|██████████| 245/245 [01:31<00:00,  2.68it/s]


Epoch [1/20], Loss: 0.7262


(valid) : 100%|██████████| 35/35 [00:12<00:00,  2.86it/s]


Accuracy: 47.14%
Precision: 0.49
Recall: 0.49
F1 Score: 0.47
Confusion Matrix:
[[128  89]
 [207 136]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.38      0.59      0.46       217
    Offensive       0.60      0.40      0.48       343

     accuracy                           0.47       560
    macro avg       0.49      0.49      0.47       560
 weighted avg       0.52      0.47      0.47       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.87it/s]


Epoch [2/20], Loss: 0.7059


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.02it/s]


Accuracy: 44.46%
Precision: 0.48
Recall: 0.48
F1 Score: 0.44
Confusion Matrix:
[[134  83]
 [228 115]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.37      0.62      0.46       217
    Offensive       0.58      0.34      0.43       343

     accuracy                           0.44       560
    macro avg       0.48      0.48      0.44       560
 weighted avg       0.50      0.44      0.44       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [3/20], Loss: 0.6937


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.02it/s]


Accuracy: 48.93%
Precision: 0.49
Recall: 0.49
F1 Score: 0.48
Confusion Matrix:
[[110 107]
 [179 164]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.38      0.51      0.43       217
    Offensive       0.61      0.48      0.53       343

     accuracy                           0.49       560
    macro avg       0.49      0.49      0.48       560
 weighted avg       0.52      0.49      0.50       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.87it/s]


Epoch [4/20], Loss: 0.6868


(valid) : 100%|██████████| 35/35 [00:08<00:00,  3.97it/s]


Accuracy: 50.71%
Precision: 0.50
Recall: 0.50
F1 Score: 0.50
Confusion Matrix:
[[104 113]
 [163 180]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.39      0.48      0.43       217
    Offensive       0.61      0.52      0.57       343

     accuracy                           0.51       560
    macro avg       0.50      0.50      0.50       560
 weighted avg       0.53      0.51      0.51       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [5/20], Loss: 0.6786


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.06it/s]


Accuracy: 50.36%
Precision: 0.50
Recall: 0.50
F1 Score: 0.50
Confusion Matrix:
[[106 111]
 [167 176]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.39      0.49      0.43       217
    Offensive       0.61      0.51      0.56       343

     accuracy                           0.50       560
    macro avg       0.50      0.50      0.50       560
 weighted avg       0.53      0.50      0.51       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [6/20], Loss: 0.6665


(valid) : 100%|██████████| 35/35 [00:08<00:00,  3.93it/s]


Accuracy: 47.50%
Precision: 0.50
Recall: 0.50
F1 Score: 0.47
Confusion Matrix:
[[131  86]
 [208 135]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.39      0.60      0.47       217
    Offensive       0.61      0.39      0.48       343

     accuracy                           0.48       560
    macro avg       0.50      0.50      0.47       560
 weighted avg       0.52      0.47      0.48       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [7/20], Loss: 0.6589


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.03it/s]


Accuracy: 49.29%
Precision: 0.50
Recall: 0.50
F1 Score: 0.49
Confusion Matrix:
[[113 104]
 [180 163]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.39      0.52      0.44       217
    Offensive       0.61      0.48      0.53       343

     accuracy                           0.49       560
    macro avg       0.50      0.50      0.49       560
 weighted avg       0.52      0.49      0.50       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [8/20], Loss: 0.6499


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.02it/s]


Accuracy: 48.04%
Precision: 0.50
Recall: 0.50
F1 Score: 0.48
Confusion Matrix:
[[133  84]
 [207 136]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.39      0.61      0.48       217
    Offensive       0.62      0.40      0.48       343

     accuracy                           0.48       560
    macro avg       0.50      0.50      0.48       560
 weighted avg       0.53      0.48      0.48       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [9/20], Loss: 0.6336


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.00it/s]


Accuracy: 51.61%
Precision: 0.53
Recall: 0.53
F1 Score: 0.51
Confusion Matrix:
[[127  90]
 [181 162]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.41      0.59      0.48       217
    Offensive       0.64      0.47      0.54       343

     accuracy                           0.52       560
    macro avg       0.53      0.53      0.51       560
 weighted avg       0.55      0.52      0.52       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [10/20], Loss: 0.6178


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.01it/s]


Accuracy: 52.32%
Precision: 0.52
Recall: 0.52
F1 Score: 0.51
Confusion Matrix:
[[107 110]
 [157 186]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.41      0.49      0.44       217
    Offensive       0.63      0.54      0.58       343

     accuracy                           0.52       560
    macro avg       0.52      0.52      0.51       560
 weighted avg       0.54      0.52      0.53       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [11/20], Loss: 0.6045


(valid) : 100%|██████████| 35/35 [00:08<00:00,  3.99it/s]


Accuracy: 52.68%
Precision: 0.51
Recall: 0.51
F1 Score: 0.51
Confusion Matrix:
[[ 96 121]
 [144 199]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.40      0.44      0.42       217
    Offensive       0.62      0.58      0.60       343

     accuracy                           0.53       560
    macro avg       0.51      0.51      0.51       560
 weighted avg       0.54      0.53      0.53       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [12/20], Loss: 0.5849


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.04it/s]


Accuracy: 50.89%
Precision: 0.51
Recall: 0.51
F1 Score: 0.50
Confusion Matrix:
[[114 103]
 [172 171]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.40      0.53      0.45       217
    Offensive       0.62      0.50      0.55       343

     accuracy                           0.51       560
    macro avg       0.51      0.51      0.50       560
 weighted avg       0.54      0.51      0.52       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [13/20], Loss: 0.5625


(valid) : 100%|██████████| 35/35 [00:08<00:00,  3.99it/s]


Accuracy: 51.61%
Precision: 0.51
Recall: 0.51
F1 Score: 0.50
Confusion Matrix:
[[102 115]
 [156 187]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.40      0.47      0.43       217
    Offensive       0.62      0.55      0.58       343

     accuracy                           0.52       560
    macro avg       0.51      0.51      0.50       560
 weighted avg       0.53      0.52      0.52       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [14/20], Loss: 0.5404


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.07it/s]


Accuracy: 52.14%
Precision: 0.50
Recall: 0.50
F1 Score: 0.50
Confusion Matrix:
[[ 83 134]
 [134 209]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.38      0.38      0.38       217
    Offensive       0.61      0.61      0.61       343

     accuracy                           0.52       560
    macro avg       0.50      0.50      0.50       560
 weighted avg       0.52      0.52      0.52       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.87it/s]


Epoch [15/20], Loss: 0.5112


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.04it/s]


Accuracy: 50.71%
Precision: 0.50
Recall: 0.50
F1 Score: 0.50
Confusion Matrix:
[[104 113]
 [163 180]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.39      0.48      0.43       217
    Offensive       0.61      0.52      0.57       343

     accuracy                           0.51       560
    macro avg       0.50      0.50      0.50       560
 weighted avg       0.53      0.51      0.51       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [16/20], Loss: 0.4833


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.03it/s]


Accuracy: 49.11%
Precision: 0.49
Recall: 0.49
F1 Score: 0.49
Confusion Matrix:
[[110 107]
 [178 165]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.38      0.51      0.44       217
    Offensive       0.61      0.48      0.54       343

     accuracy                           0.49       560
    macro avg       0.49      0.49      0.49       560
 weighted avg       0.52      0.49      0.50       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [17/20], Loss: 0.4606


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.06it/s]


Accuracy: 48.75%
Precision: 0.49
Recall: 0.49
F1 Score: 0.48
Confusion Matrix:
[[103 114]
 [173 170]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.37      0.47      0.42       217
    Offensive       0.60      0.50      0.54       343

     accuracy                           0.49       560
    macro avg       0.49      0.49      0.48       560
 weighted avg       0.51      0.49      0.49       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.87it/s]


Epoch [18/20], Loss: 0.4346


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.01it/s]


Accuracy: 50.89%
Precision: 0.50
Recall: 0.50
F1 Score: 0.50
Confusion Matrix:
[[101 116]
 [159 184]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.39      0.47      0.42       217
    Offensive       0.61      0.54      0.57       343

     accuracy                           0.51       560
    macro avg       0.50      0.50      0.50       560
 weighted avg       0.53      0.51      0.51       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.88it/s]


Epoch [19/20], Loss: 0.4054


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.03it/s]


Accuracy: 48.93%
Precision: 0.49
Recall: 0.48
F1 Score: 0.48
Confusion Matrix:
[[101 116]
 [170 173]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.37      0.47      0.41       217
    Offensive       0.60      0.50      0.55       343

     accuracy                           0.49       560
    macro avg       0.49      0.48      0.48       560
 weighted avg       0.51      0.49      0.50       560



(train) : 100%|██████████| 245/245 [01:25<00:00,  2.87it/s]


Epoch [20/20], Loss: 0.3738


(valid) : 100%|██████████| 35/35 [00:08<00:00,  4.03it/s]

Accuracy: 49.29%
Precision: 0.50
Recall: 0.50
F1 Score: 0.49
Confusion Matrix:
[[110 107]
 [177 166]]
Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.38      0.51      0.44       217
    Offensive       0.61      0.48      0.54       343

     accuracy                           0.49       560
    macro avg       0.50      0.50      0.49       560
 weighted avg       0.52      0.49      0.50       560






In [15]:
# Load the best model state at the end
if best_model_state is not None:
    model.load_state_dict(best_model_state)
    print("Best model loaded with F1 score:", best_f1)
else:
    print("No best model found.")

# Test the best model on the test data
model.eval()
all_labels = []
all_predictions = []
with torch.no_grad():
    pbar = tqdm(enumerate(test_loader), total=len(test_loader), desc=f"(test) ")
    for step, batch in pbar:
        batch = {k: v.to(device) for k, v in batch.items()}
        labels = batch["label"]
        yHat = model(**batch)
        out = torch.argmax(yHat, axis=1)
        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(out.cpu().numpy())

# Compute test metrics
accuracy = accuracy_score(all_labels, all_predictions)
precision = precision_score(all_labels, all_predictions, average='macro')
recall = recall_score(all_labels, all_predictions, average='macro')
f1 = f1_score(all_labels, all_predictions, average='macro')
conf_matrix = confusion_matrix(all_labels, all_predictions)
class_report = classification_report(all_labels, all_predictions, target_names=['Not Offensive', 'Offensive'])

# Print test metrics
print(f"Test Accuracy: {accuracy * 100:.4f}%")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1 Score: {f1:.4f}")
print("Test Confusion Matrix:")
print(conf_matrix)
print("Test Classification Report:")
print(class_report)

Best model loaded with F1 score: 0.5141736694677871


(test) : 100%|██████████| 88/88 [00:30<00:00,  2.89it/s]

Test Accuracy: 53.2523%
Test Precision: 0.5233
Test Recall: 0.5244
Test F1 Score: 0.5212
Test Confusion Matrix:
[[265 278]
 [376 480]]
Test Classification Report:
               precision    recall  f1-score   support

Not Offensive       0.41      0.49      0.45       543
    Offensive       0.63      0.56      0.59       856

     accuracy                           0.53      1399
    macro avg       0.52      0.52      0.52      1399
 weighted avg       0.55      0.53      0.54      1399






In [16]:
# for epoch in range(num_epochs):
#     model.train()
#     running_loss = 0.0
#     pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"(train) ")
#     all_labels = []
#     all_predictions = []
#     for step,batch in pbar:
#         batch = {k: v.to(device) for k, v in batch.items()}
#         labels = batch["label"]
#         yHat = model.forward(**batch)
#         out = torch.argmax(yHat, axis=1)
#         all_labels.extend(labels.cpu().numpy())
#         all_predictions.extend(out.cpu().numpy())
#         optimizer.zero_grad()
#         loss = criterion(yHat, labels)
#         running_loss += loss
#         loss.backward()
#         optimizer.step()
        
#         if scheduler is not None:
#             scheduler.step()
        

#     print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")
#     #print(all_labels,all_predictions)
#     accuracy = accuracy_score(all_labels, all_predictions)
#     precision = precision_score(all_labels, all_predictions, average='macro')
#     recall = recall_score(all_labels, all_predictions, average='macro')
#     f1 = f1_score(all_labels, all_predictions, average='macro')
#     conf_matrix = confusion_matrix(all_labels, all_predictions)
#     class_report = classification_report(all_labels, all_predictions, target_names=['Not Offensive', 'Offensive'])
#     # Print metrics
#     print(f"Accuracy: {accuracy * 100:.2f}%")
#     print(f"Precision: {precision:.2f}")
#     print(f"Recall: {recall:.2f}")
#     print(f"F1 Score: {f1:.2f}")
#     print("Confusion Matrix:")
#     print(conf_matrix)
#     print("Classification Report:")
#     print(class_report)
    
#     # Evaluation
    
#     model.eval()
#     all_labels = []
#     all_predictions = []
#     with torch.no_grad():
#         pbar = tqdm(enumerate(val_loader), total=len(val_loader), desc=f"(valid) ")
#         for step, batch in pbar:
#             batch = {k: v.to(device) for k, v in batch.items()}
#             labels = batch["label"]
#             yHat = model.forward(**batch)
#             out = torch.argmax(yHat, axis=1)
#             all_labels.extend(labels.cpu().numpy())
#             all_predictions.extend(out.cpu().numpy())
            

#     # Compute metrics
#     # print(all_labels,all_predictions)
#     accuracy = accuracy_score(all_labels, all_predictions)
#     precision = precision_score(all_labels, all_predictions, average='macro')
#     recall = recall_score(all_labels, all_predictions, average='macro')
#     f1 = f1_score(all_labels, all_predictions, average='macro')
#     conf_matrix = confusion_matrix(all_labels, all_predictions)
#     class_report = classification_report(all_labels, all_predictions, target_names=['Not Offensive', 'Offensive'])

#     # Print metrics
#     print(f"Accuracy: {accuracy * 100:.2f}%")
#     print(f"Precision: {precision:.2f}")
#     print(f"Recall: {recall:.2f}")
#     print(f"F1 Score: {f1:.2f}")
#     print("Confusion Matrix:")
#     print(conf_matrix)
#     print("Classification Report:")
#     print(class_report)

In [17]:
# model.eval()
# all_labels = []
# all_predictions = []
# with torch.no_grad():
#     pbar = tqdm(enumerate(test_loader), total=len(test_loader), desc=f"(Test) ")
#     for step, batch in pbar:
#         batch = {k: v.to(device) for k, v in batch.items()}
#         labels = batch["label"]
#         yHat = model.forward(**batch)
#         out = torch.argmax(yHat, axis=1)
#         all_labels.extend(labels.cpu().numpy())
#         all_predictions.extend(out.cpu().numpy())
            
# # Compute metrics
# # print(all_labels,all_predictions)
# accuracy = accuracy_score(all_labels, all_predictions)
# precision = precision_score(all_labels, all_predictions, average='macro')
# recall = recall_score(all_labels, all_predictions, average='macro')
# f1 = f1_score(all_labels, all_predictions, average='macro')
# conf_matrix = confusion_matrix(all_labels, all_predictions)
# class_report = classification_report(all_labels, all_predictions, target_names=['Not Offensive', 'Offensive'])

# # Print metrics
# print(f"Accuracy: {accuracy * 100:.2f}%")
# print(f"Precision: {precision:.2f}")
# print(f"Recall: {recall:.2f}")
# print(f"F1 Score: {f1:.2f}")
# print("Confusion Matrix:")
# print(conf_matrix)
# print("Classification Report:")
# print(class_report)