In [1]:
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from transformers import DebertaTokenizer, DebertaModel, ViTModel
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from sklearn.model_selection import KFold
from imblearn.over_sampling import RandomOverSampler
import numpy as np
from PIL import Image

# Load the dataset
df = pd.read_excel('multi-sent.xlsx')

# Filter out the 'neutral' class
df = df[df['Label_Sentiment'].isin(['positive', 'negative'])]

# Map labels to 'troll' and 'not troll'
label_mapping = {'positive': 0, 'negative': 1}

# Separate features and labels for oversampling
X = df[['image_name', 'Captions']]  
y = df['Label_Sentiment'].map(label_mapping) 

#  Random Oversampling
ros = RandomOverSampler(sampling_strategy='auto', random_state=42)
X_resampled, y_resampled = ros.fit_resample(X, y)

#  a new resampled dataframe
df_resampled = pd.DataFrame(X_resampled, columns=['image_name', 'Captions'])
df_resampled['Label_Sentiment'] = y_resampled.map({0: 'positive', 1: 'negative'})  # Map back to labels

# Dataset class
class MemeDataset(Dataset):
    def __init__(self, dataframe, img_dir, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform
        self.tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')
        self.labels = label_mapping

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.img_dir, self.dataframe.iloc[idx, 0])
        
        try:
            image = Image.open(img_name).convert("RGB")
        except FileNotFoundError:
            return None
        
        if self.transform:
            image = self.transform(image)
        
        text = self.dataframe.iloc[idx, 1]
        tokens = self.tokenizer(text, padding='max_length', truncation=True, max_length=128, return_tensors="pt")
        label = self.labels[self.dataframe.iloc[idx, 2]]
        
        sample = {'image': image, 'input_ids': tokens['input_ids'].squeeze(), 'attention_mask': tokens['attention_mask'].squeeze(), 'label': label}
        return sample

# Transformations for the image
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load the resampled dataset
dataset = MemeDataset(dataframe=df_resampled, img_dir='Memes/', transform=transform)

# Custom collate function to filter out None samples
def collate_fn(batch):
    batch = [sample for sample in batch if sample is not None]
    if len(batch) == 0:
        return None
    return torch.utils.data.dataloader.default_collate(batch)

# Define the Multimodal Model with Dropout
class AdvancedMultimodalModel(nn.Module):
    def __init__(self):
        super(AdvancedMultimodalModel, self).__init__()
        self.vision_model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
        self.text_model = DebertaModel.from_pretrained('microsoft/deberta-base')
        self.dropout = nn.Dropout(p=0.3)  
        self.classifier = nn.Linear(768 + 768, 2)

    def forward(self, input_ids, attention_mask, pixel_values):
        vision_outputs = self.vision_model(pixel_values=pixel_values).last_hidden_state[:, 0, :]
        text_outputs = self.text_model(input_ids=input_ids, attention_mask=attention_mask).last_hidden_state[:, 0, :]
        combined = torch.cat((vision_outputs, text_outputs), dim=1)
        combined = self.dropout(combined)  
        logits = self.classifier(combined)
        return logits

model = AdvancedMultimodalModel()

# Training
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Early stopping parameters
patience = 2
best_loss = float('inf')
early_stop_counter = 0

# K-Fold Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
fold_results = []

for fold, (train_idx, test_idx) in enumerate(kf.split(dataset)):
    print(f'Fold {fold + 1}')
    
    train_subsampler = Subset(dataset, train_idx)
    test_subsampler = Subset(dataset, test_idx)
    
    train_dataloader = DataLoader(train_subsampler, batch_size=16, shuffle=True, collate_fn=collate_fn)
    test_dataloader = DataLoader(test_subsampler, batch_size=16, shuffle=False, collate_fn=collate_fn)
    
    # Training loop with early stopping
    model.train()
    for epoch in range(10):  
        epoch_loss = 0.0
        for batch in train_dataloader:
            if batch is None:
                continue
            optimizer.zero_grad()
            outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], pixel_values=batch['image'])
            loss = criterion(outputs, batch['label'])
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        
        # Validation step for early stopping
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch in test_dataloader:
                if batch is None:
                    continue
                outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], pixel_values=batch['image'])
                loss = criterion(outputs, batch['label'])
                val_loss += loss.item()
        
        print(f'Epoch {epoch + 1} - Train Loss: {epoch_loss:.4f}, Val Loss: {val_loss:.4f}')
        
        # Check early stopping condition
        if val_loss < best_loss:
            best_loss = val_loss
            early_stop_counter = 0  
        else:
            early_stop_counter += 1
            if early_stop_counter >= patience:
                print(f'Early stopping triggered at epoch {epoch + 1}')
                break  
    
    # Evaluation
    model.eval()
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for batch in test_dataloader:
            if batch is None:
                continue
            outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'], pixel_values=batch['image'])
            _, preds = torch.max(outputs, 1)
            all_labels.extend(batch['label'].numpy())
            all_preds.extend(preds.numpy())

    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    precision = precision_score(all_labels, all_preds)
    recall = recall_score(all_labels, all_preds)
    conf_matrix = confusion_matrix(all_labels, all_preds)

    fold_results.append({
        'fold': fold + 1,
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'confusion_matrix': conf_matrix
    })
    print(f'Fold {fold + 1} - Accuracy: {accuracy}, F1: {f1}, Precision: {precision}, Recall: {recall}')
    print(f'Confusion Matrix:\n{conf_matrix}')

# Average results across folds
avg_accuracy = np.mean([result['accuracy'] for result in fold_results])
avg_f1 = np.mean([result['f1'] for result in fold_results])
avg_precision = np.mean([result['precision'] for result in fold_results])
avg_recall = np.mean([result['recall'] for result in fold_results])

print(f'Average Accuracy: {avg_accuracy}')
print(f'Average F1 Score: {avg_f1}')
print(f'Average Precision: {avg_precision}')
print(f'Average Recall: {avg_recall}')


  from .autonotebook import tqdm as notebook_tqdm


Fold 1
Epoch 1 - Train Loss: 152.8498, Val Loss: 37.9904
Epoch 2 - Train Loss: 120.5480, Val Loss: 34.8782
Epoch 3 - Train Loss: 88.4480, Val Loss: 31.0782
Epoch 4 - Train Loss: 55.6514, Val Loss: 30.8138
Epoch 5 - Train Loss: 37.9270, Val Loss: 29.2028
Epoch 6 - Train Loss: 29.3872, Val Loss: 29.8254
Epoch 7 - Train Loss: 26.1829, Val Loss: 35.1507
Early stopping triggered at epoch 7
Fold 1 - Accuracy: 0.8533455545371219, F1: 0.8493408662900188, Precision: 0.877431906614786, Recall: 0.822992700729927
Confusion Matrix:
[[480  63]
 [ 97 451]]
Fold 2
Epoch 1 - Train Loss: 41.8601, Val Loss: 4.2531
Epoch 2 - Train Loss: 18.1358, Val Loss: 4.7919
Epoch 3 - Train Loss: 14.5378, Val Loss: 5.0240
Early stopping triggered at epoch 3
Fold 2 - Accuracy: 0.9743589743589743, F1: 0.9745454545454545, Precision: 0.9537366548042705, Recall: 0.9962825278810409
Confusion Matrix:
[[528  26]
 [  2 536]]
Fold 3
Epoch 1 - Train Loss: 15.8920, Val Loss: 1.8175
Epoch 2 - Train Loss: 17.4546, Val Loss: 5.4674


In [12]:
!pip install openpyxl


Defaulting to user installation because normal site-packages is not writeable
Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.9/250.9 KB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m[31m2.8 MB/s[0m eta [36m0:00:01[0m
[?25hCollecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.5
