In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset
import os
from PIL import Image
import pandas as pd

from custom_dataset import MemeDataset
from vilbert_adapt import MemeClassifier

  from .autonotebook import tqdm as notebook_tqdm


# step 1: preprocessing and data loading

In [2]:
# Define transformations for image preprocessing
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizing using ImageNet statistics
])

# Define dataset
dataset_path = ''
image_path = os.path.join(dataset_path, 'dataset/img_resized')
text_path = os.path.join(dataset_path, 'dataset/img_txt')
GT_path = os.path.join(dataset_path, 'dataset/MMHS150K_GT.csv')
dataset = MemeDataset(GT_path, image_path, transform=data_transforms)

# Split dataset into training, validation, and test sets 
train_size = int(0.8 * len(dataset))
val_size = (len(dataset) - train_size) // 2
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)



visual inspection of step 1

In [3]:
# import matplotlib.pyplot as plt

# # Function to display images from the dataset
# def show_samples(dataset, num_samples=5):
#     plt.figure(figsize=(12, 8))
#     for i in range(num_samples):
#         image, _, _, label = dataset[i]
#         image = image.permute(1, 2, 0)  # Reorder dimensions for visualization (C, H, W) to (H, W, C)
#         image = image.numpy()  # Convert torch tensor to numpy array
#         label = "Hateful" if label == 1 else "Not Hateful"
#         plt.subplot(1, num_samples, i+1)
#         plt.imshow(image)
#         plt.title(label)
#         plt.axis('off')
#     plt.show()

# # Show samples from training dataset
# show_samples(train_dataset, num_samples=5)

# step 2: Model building

In [4]:
# from torchsummary import summary

# # Instantiate the model
# model = VilBertForHatefulContentDetection(num_classes=2)

# # Move the model to the appropriate device
# # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
# model.to(device)

# # Generate dummy input tensors
# dummy_images = torch.randn(1, 3, 224, 224).to(device)  # Assuming batch size of 1 and image size of 224x224
# dummy_input_ids = torch.randint(0, 1000, (1, 128)).to(device)  # Assuming maximum sequence length of 128 and vocabulary size of 1000
# dummy_attention_mask = torch.ones_like(dummy_input_ids).to(device)  # Assuming all tokens are attended to

# # Pass dummy inputs through the model
# output = model(dummy_images, dummy_input_ids, dummy_attention_mask)

# # Print output shape
# print(output.shape)

# print(model)


# step 3: Model training

In [5]:
# from transformers import AdamW, get_linear_schedule_with_warmup

# def train_model(model, data_loader, optimizer, device, scheduler, num_epochs=4):
#     model = model.train()
    
#     for epoch in range(num_epochs):
#         for batch in data_loader:
#             batch = {k: v.to(device) for k, v in batch.items()}
#             outputs = model(input_ids=batch['input_ids'], attention_mask=batch['attention_mask'])
#             loss = nn.CrossEntropyLoss()(outputs, batch['labels'])

#             loss.backward()
#             nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
#             optimizer.step()
#             scheduler.step()
#             optimizer.zero_grad()
#         print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}')

In [6]:
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model = MemeClassifier().to(device)
# for name, param in model.named_parameters():
#     if param.requires_grad:
#         print(name, param.data.shape)

In [10]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm

def train_model(model, train_loader, val_loader, num_epochs=5, learning_rate=0.001):
    torch.cuda.empty_cache()
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = torch.device("cpu")
    model.to(device)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for batch in train_loader:
            batch = {k: v.to(device) for k, v in batch.items()}
            optimizer.zero_grad()
            outputs = model(batch['input_ids'], batch['attention_mask'])
            loss = criterion(outputs, batch['labels'])
            loss.backward()
            optimizer.step()
            _, predicted = torch.max(outputs.data, 1)
            total += batch['labels'].size(0)
            correct += (predicted == batch['labels']).sum().item()
            running_loss += loss.item() * batch['input_ids'].size(0)
        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = correct / total
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch in train_loader:
                batch = {k: v.to(device) for k, v in batch.items()}
                outputs = model(batch['input_ids'], batch['attention_mask'])
                loss = criterion(outputs, batch['labels'])
                val_loss += loss.item() * batch['input_ids'].size(0)
                _, predicted = torch.max(outputs.data, 1)
                total += batch['labels'].size(0)
                correct += (predicted == batch['labels']).sum().item()
        val_loss = val_loss / len(val_loader.dataset)
        val_accuracy = correct / total
        
        print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

model = MemeClassifier()
# Example usage
train_model(model, train_loader, val_loader, num_epochs=5, learning_rate=0.0001)




# step 4: Model evaluation

In [None]:
import torch
import torch.nn as nn

def evaluate_model(model, dataloader):
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.eval()
    
    criterion = nn.CrossEntropyLoss()
    
    total_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, input_ids, attention_mask, labels in tqdm(dataloader, desc='Evaluation'):
            images, input_ids, attention_mask, labels = images.to(device), input_ids.to(device), attention_mask.to(device), labels.to(device)
            outputs = model(images, input_ids, attention_mask)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    average_loss = total_loss / len(dataloader.dataset)
    accuracy = correct / total

    print(f'Evaluation Loss: {average_loss:.4f}, Accuracy: {accuracy:.4f}')

# Example usage
evaluate_model(model, val_loader)


In [9]:
torch.cuda.empty_cache()