In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset
import os
from PIL import Image
import pandas as pd
from torch.utils.data.sampler import WeightedRandomSampler
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
from sklearn.metrics import f1_score, accuracy_score 

from preprocessing import create_csv_labels
from custom_dataset import CustomDataset
from vilbert_adapt import CustomBert
from utils import *

In [2]:
on_colab = False
create_csv = False

# step 1: preprocessing and data loading

In [3]:
if on_colab:
    from google.colab import drive
    drive.mount('/content/drive')
    dataset_path = 'drive/MyDrive/DL_project'
else:
    dataset_path = ''

# Load dataset
image_path = os.path.join(dataset_path, 'dataset/img_resized')
img_text_path = os.path.join(dataset_path, 'dataset/img_txt')
json_path = os.path.join(dataset_path, 'dataset/MMHS150K_GT.json')
GT_path = os.path.join(dataset_path, 'dataset/MMHS150K_Custom.csv')

In [4]:
# Create cleaned csv file
if create_csv:
    filename = os.path.join(dataset_path, "dataset/MMHS1150K_Custom.csv")
    create_csv_labels(json_path, filename, img_text_path)
    GT_path = filename

In [5]:
# Define transformations for image preprocessing
data_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalizing using ImageNet statistics
])

dataset = CustomDataset(GT_path, image_path, img_text_path, transform=data_transforms)




In [6]:
# visual inspection
dataset[3]

now


(tensor([  101,  1026, 23325, 15900,  1028, 24761,  6508,  6904, 13871,  4140,
          1026, 24471,  2140,  1028,   102,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,  

In [7]:
# Define hyperparameters -------------------------------------------------------
import numpy as np
batch_size = 5

# ------------------------------------------------------------------------------

# Split dataset into training, validation, and test sets
dataset_size = len(dataset)
print(dataset_size)
train_set, test_set, val_set = torch.utils.data.dataset.random_split(dataset, [0.8, 0.1, 0.1])
# train_set = np.floor(train_set)
# test_set = round(test_set)
# val_set = round(val_set)

print(train_set) 
print(test_set) 
print(val_set)

# Create data loader for training set
not_hate_indices = []
hate_indices = []
for idx in range(len(train_set)):
    if train_set[idx][5] == 1:
        hate_indices.append(idx)
    else:
        not_hate_indices.append(idx)

num_not_hate = len(not_hate_indices)
num_hate = len(hate_indices)
total_samples = num_not_hate + num_hate

# Create a WeightedRandomSampler to balance the training dataset
class_weights = [1-num_hate/total_samples, 1-num_not_hate/ total_samples]  # Inverse of number of samples per class

weights = []
for idx in range(len(train_set)):
    try:
        label = dataset[idx][5]
        according_weights = class_weights[int(label)]
        weights.append(according_weights)
    except:
        print(f"Error with idx: {idx}")
        print(f"Label: {dataset[idx][5]}")

# weights = [class_weights[int(dataset[idx]['label'])] for idx in train_indices]
sampler = WeightedRandomSampler(weights, len(weights))

# Create data loader for balanced training set
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, sampler=sampler)
print("here")

# Create data loaders for validation and test sets
validation_loader = torch.utils.data.DataLoader(val_set, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size)


6
<torch.utils.data.dataset.Subset object at 0x00000285C7B86110>
<torch.utils.data.dataset.Subset object at 0x00000285C7D5FAD0>
<torch.utils.data.dataset.Subset object at 0x00000285C7D5C890>
now
now
now




now
now
now
now
now
now
now
here


# step 2: Model building

In [8]:
model = CustomBert()
print(model) 

CustomBert(
  (text_bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_a

In [9]:

def train_model(model, train_loader, val_loader, metrics, num_epochs=1, learning_rate=0.001):
    torch.cuda.empty_cache()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    model.to(device)

    epoch_metrics = dict(zip(metrics.keys(), torch.zeros(len(metrics))))
    
    train_loss_log,  test_loss_log = [], []
    metrics_names = list(metrics.keys())
    train_metrics_log = [[] for i in range(len(metrics))]
    test_metrics_log = [[] for i in range(len(metrics))]

    criterion = nn.BCELoss() # Binary cross-entropy loss
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for i, batch in enumerate(train_loader):
            # print(f"Batch {i}")
            batch = [b.to(device) for b in batch]
            text, mask, images, image_mask, visual_token, labels = batch
            optimizer.zero_grad()
            outputs = model(text, mask, images, image_mask, visual_token)
            loss = criterion(outputs, labels.float())
            loss.backward()
            optimizer.step()
            predicted = (outputs.detach() > 0.5)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            running_loss += loss.item() * labels.size(0)
            # compute metrics
            # no gradients should be propagated at this step
            with torch.no_grad():
                for k in epoch_metrics.keys():
                    epoch_metrics[k] += metrics[k](predicted, labels)

        for k in epoch_metrics.keys():
          epoch_metrics[k] /= len(train_loader)

        train_loss = running_loss / len(train_loader.dataset)
        train_accuracy = correct / total

        clear_output() #clean the prints from previous epochs
        print('train Loss: {:.4f}, '.format(train_loss),
            ', '.join(['{}: {:.4f}'.format(k, epoch_metrics[k]) for k in epoch_metrics.keys()]))


        train_loss_log.append(train_loss)
        train_metrics_log = update_metrics_log(metrics_names, train_metrics_log, epoch_metrics)

        plot_training(train_loss_log, metrics_names, train_metrics_log)
        
        # # Validation phase
        # ...

        # # Testing phase
        # test_loss_log.append(test_loss)
        # test_metrics_log = update_metrics_log(metrics_names, test_metrics_log, test_metrics)


        # # Validation phase
        # model.eval()
        # val_loss = 0.0
        # correct = 0
        # total = 0


        # with torch.no_grad():
        #     for batch in train_loader:
        #         batch = [b.to(device) for b in batch]
        #         text, mask, labels = batch
        #         outputs = model(text, mask)
        #         loss = criterion(outputs, labels.float())
        #         val_loss += loss.item() * text.size(0)
        #         _, predicted = torch.max(outputs.data, 1)
        #         total += labels.size(0)
        #         correct += (predicted == labels.sum().item())
        # val_loss = val_loss / len(val_loader.dataset)
        # val_accuracy = correct / total

        print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')
        # print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

def acc(preds, target):
    return accuracy_score(target.detach().cpu(), preds.detach().cpu())

metrics = {'ACC': acc}
# Example usage
train_model(model, train_loader, validation_loader, metrics, num_epochs=5, learning_rate=0.0001)


now
now
now
now
now


RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)

In [None]:
# def train_model(model, train_loader, val_loader, num_epochs=1, learning_rate=0.001):
#     torch.cuda.empty_cache()
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     # device = torch.device("cpu")
#     model.to(device)

#     criterion = nn.BCELoss() # Binary cross-entropy loss
#     optimizer = optim.Adam(model.parameters(), lr=learning_rate)

#     for epoch in range(num_epochs):
#         # Training phase
#         model.train()
#         running_loss = 0.0
#         correct = 0
#         total = 0
#         for i,batch in enumerate(train_loader):
#             # print(f"Batch {i}")
#             batch = [b.to(device) for b in batch]
#             text, mask, labels = batch
#             optimizer.zero_grad()
#             outputs = model(text, mask)
#             loss = criterion(outputs, labels.float())
#             loss.backward()
#             optimizer.step()
#             predicted = (outputs.detach() > 0.5)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()
#             running_loss += loss.item() * labels.size(0)
#         train_loss = running_loss / len(train_loader.dataset)
#         train_accuracy = correct / total

#         # # Validation phase
#         # model.eval()
#         # val_loss = 0.0
#         # correct = 0
#         # total = 0
#         # with torch.no_grad():
#         #     for batch in train_loader:
#         #         batch = [b.to(device) for b in batch]
#         #         text, mask, labels = batch
#         #         outputs = model(text, mask)
#         #         loss = criterion(outputs, labels.float())
#         #         val_loss += loss.item() * text.size(0)
#         #         _, predicted = torch.max(outputs.data, 1)
#         #         total += labels.size(0)
#         #         correct += (predicted == labels.sum().item())
#         # val_loss = val_loss / len(val_loader.dataset)
#         # val_accuracy = correct / total

#         print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')
#         # print(f'Epoch {epoch + 1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}, Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')

# # Example usage
# train_model(model, train_loader, validation_loader, num_epochs=5, learning_rate=0.0001)


KeyboardInterrupt: 

# step 4: Model evaluation

In [None]:
# import torch
# import torch.nn as nn

# def evaluate_model(model, dataloader):
#     # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#     model.eval()

#     criterion = nn.CrossEntropyLoss()

#     total_loss = 0.0
#     correct = 0
#     total = 0

#     with torch.no_grad():
#         for images, input_ids, attention_mask, labels in tqdm(dataloader, desc='Evaluation'):
#             images, input_ids, attention_mask, labels = images.to(device), input_ids.to(device), attention_mask.to(device), labels.to(device)
#             outputs = model(images, input_ids, attention_mask)
#             loss = criterion(outputs, labels)
#             total_loss += loss.item() * images.size(0)
#             _, predicted = torch.max(outputs.data, 1)
#             total += labels.size(0)
#             correct += (predicted == labels).sum().item()

#     average_loss = total_loss / len(dataloader.dataset)
#     accuracy = correct / total

#     print(f'Evaluation Loss: {average_loss:.4f}, Accuracy: {accuracy:.4f}')

# # Example usage
# evaluate_model(model, val_loader)


In [None]:
torch.cuda.empty_cache()