In [1]:
import os
gpu_number = "1" # Choose either 0 or 1
os.environ['CUDA_ENVIRONMENT_DEVICES'] = gpu_number

In [2]:
import os
import sys
import numpy as np
from numpy import asarray,zeros
import pandas as pd 
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler, Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm
import matplotlib.pyplot as plt
import copy
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import classification_report
import transformers
from transformers import BertTokenizer
from transformers import AdamW, get_linear_schedule_with_warmup
from transformers import AutoTokenizer, AutoModel, AutoConfig, AdamW, get_linear_schedule_with_warmup
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import timm
from losses import SupConLoss
from torchlars import LARS

In [3]:
if torch.cuda.is_available():
    device_name = "cuda:" + gpu_number
    device = torch.device(device_name)
else:
    device = torch.device("cpu")
# device = torch.device("cpu") # Force CPU
print("Using device", device)

Using device cuda:1


In [4]:
# # NOTE: Resnet+BERT Multimodal data
# source_multimodal_arr = np.load("../data/source_multimodal_out.npy")
# target_multimodal_arr = np.load("../data/target_multimodal_out.npy")
# print("Source shape", source_multimodal_arr.shape)
# print("Target shape", target_multimodal_arr.shape)

In [5]:
# NOTE: Visual BERT Multimodal data
source_multimodal_arr = np.load("../data/vbert_old/source_mm_vbert.npy")
target_multimodal_arr = np.load("../data/vbert_old/target_mm_vbert.npy")
print("Source shape", source_multimodal_arr.shape)
print("Target shape", target_multimodal_arr.shape)

Source shape (11766, 768)
Target shape (11766, 768)


In [6]:
# # NOTE:New features (not good)
# diff_arr = source_multimodal_arr - target_multimodal_arr
# mul_diff_arr = source_multimodal_arr * diff_arr
# print("Source shape", diff_arr.shape)
# print("Target shape", mul_diff_arr.shape)

In [7]:
# # Considering only the text parts - #TODO: Comment if not necessary
# source_multimodal_arr = source_multimodal_arr[:,768:] # Only BERT
# target_multimodal_arr = target_multimodal_arr[:,768:] # Only BERT
# print("Source shape", source_multimodal_arr.shape)
# print("Target shape", target_multimodal_arr.shape)

In [8]:
# Single tensor -> (num_samples, num_channels, features(Normalize))
newshape = source_multimodal_arr.shape[0], 1, source_multimodal_arr.shape[1]
source_multimodal_arr = np.reshape(source_multimodal_arr, newshape=newshape)
target_multimodal_arr = np.reshape(target_multimodal_arr, newshape=newshape)
combine_multimodal_array = np.concatenate((target_multimodal_arr, source_multimodal_arr), axis=1) # For (s, t)
print('Combine multimodal shape', combine_multimodal_array.shape)

Combine multimodal shape (11766, 2, 768)


In [9]:
# #NOTE: Use New Features (Fails)
# # Single tensor -> (num_samples, num_channels, features(Normalize))
# newshape = diff_arr.shape[0], 1, diff_arr.shape[1]
# diff_arr = np.reshape(diff_arr, newshape=newshape)
# mul_diff_arr = np.reshape(mul_diff_arr, newshape=newshape)
# combine_multimodal_array = np.concatenate((diff_arr, mul_diff_arr), axis=1) # For (s-t, s*(s-t))
# print('Combine multimodal shape', combine_multimodal_array.shape)

In [10]:
# #NOTE: Use New Features (Fails)
# # Single tensor -> (num_samples, num_channels, features(Normalize))
# newshape = diff_arr.shape[0], 1, diff_arr.shape[1]
# diff_arr = np.reshape(diff_arr, newshape=newshape)
# source_multimodal_arr = np.reshape(source_multimodal_arr, newshape=newshape)
# combine_multimodal_array = np.concatenate((source_multimodal_arr, diff_arr), axis=1) # Form - (s, s-t)
# print('Combine multimodal shape', combine_multimodal_array.shape)

In [11]:
combine_multimodal_tensor = torch.tensor(combine_multimodal_array, device=device)

In [12]:
print(combine_multimodal_tensor.shape)

torch.Size([11766, 2, 768])


In [13]:
# Loading the data
labels_data = np.load("../data/labels.npy")
labels_tensor = torch.tensor(labels_data, dtype=torch.long, device=device).squeeze(1)
print('Labels tensor shape', labels_tensor.shape)

Labels tensor shape torch.Size([11766])


In [14]:
labels_tensor.unique() # 0-Fake, 1-Real

tensor([0, 1], device='cuda:1')

In [15]:
#TODO: Add Pytorch DataLoader
def get_data_loader(batch_size, multimodal_input, labels):
	data = TensorDataset(multimodal_input, labels)
	sampler = RandomSampler(data)
	dataloader = DataLoader(data, sampler=sampler, batch_size=batch_size)
	return data, sampler, dataloader

In [16]:
batch_size = 512
train_data, train_sampler, train_dataloader = get_data_loader(batch_size, combine_multimodal_tensor, labels_tensor)

In [17]:
# For Visual BERT only (Expt)
class ContrastiveModelVisualBERT(nn.Module):
    def __init__(self, initial_dim):
        super(ContrastiveModelVisualBERT, self).__init__()
        self.project_1 = nn.Linear(initial_dim, 512, bias=True)
        # self.project_2 = nn.Linear(512, 128, bias=True)
        # self.dropout = nn.Dropout(0.2)
    def forward(self, multimodal_input):
        contrast_space = self.project_1(multimodal_input)
        normalize_contrast = F.normalize(contrast_space, dim=2)
        return normalize_contrast

In [18]:
# Model imported from the previous network
class ContrastiveModel(nn.Module):
    def __init__(self, initial_dim):
        super(ContrastiveModel, self).__init__()
        self.project_1 = nn.Linear(initial_dim, 512, bias=True)
        self.project_2 = nn.Linear(512, 128, bias=True)
        self.dropout = nn.Dropout(0.2)
    def forward(self, multimodal_input):
        contrast_space = self.project_2(self.project_1(multimodal_input))
        normalize_contrast = F.normalize(contrast_space, dim=2)
        return normalize_contrast

In [19]:
# # Not using
# class ContrastiveModelNew(nn.Module):
#     def __init__(self, initial_dim):
#         super(ContrastiveModel, self).__init__()
#         self.project_1 = nn.Linear(initial_dim, initial_dim//2)
#         self.project_2 = nn.Linear(initial_dim//2, initial_dim//4)
#         self.project_3 = nn.Linear(initial_dim//4, 128)
#         self.dropout = nn.Dropout(0.2)
#     def forward(self, multimodal_input):
#         project_down_1 = self.project_1(multimodal_input)
#         project_down_2 = self.project_2(project_down_1)
#         contrast_space = self.project_3(project_down_2)
#         normalize_contrast = F.normalize(contrast_space, dim=2)
#         return normalize_contrast

In [20]:
# Experiment with the loss
temp = 0.1
criterion = SupConLoss(temperature=temp, device = device)
initial_dim = combine_multimodal_tensor.shape[2]
contrast_model = ContrastiveModel(initial_dim).to(device) # Generic model
# contrast_model = ContrastiveModelVisualBERT(initial_dim).to(device) # For visual bert only
# contrast_normalized_out = contrast_model(combine_multimodal_tensor[0:512,:,:])
# loss = criterion(features = contrast_normalized_out, labels = labels_tensor[0:512])
# print(loss)

In [21]:
# Optimizer and scheduler
def get_optimizer_scheduler(name, model, train_dataloader_len, epochs, lr_set):
	if name == "Adam":
		optimizer = AdamW(model.parameters(),
                  lr = lr_set, # args.learning_rate - default is 5e-5, our notebook had 2e-5
                  eps = 1e-8 # args.adam_epsilon  - default is 1e-8.
		)
	elif name == "LARS-SGD":
		base_optimizer = optim.SGD(model.parameters(), lr=lr_set, momentum=0.9)
		optimizer = LARS(optimizer=base_optimizer, eps=1e-8, trust_coef=0.001)

	total_steps = train_dataloader_len * epochs

	# Create the learning rate scheduler.
	scheduler = get_linear_schedule_with_warmup(optimizer, 
												num_warmup_steps = total_steps//3, # Default value in run_glue.py
												num_training_steps = total_steps)
	return optimizer, scheduler

In [22]:
# Getting the optimizer and scheduler
epochs = 1000
# lr = 3e-5 # Less LR
lr = 5
iters_to_accumulate = 2
# name = "Adam"
name = "LARS-SGD"
optimizer, scheduler = get_optimizer_scheduler(name, contrast_model, len(train_dataloader), epochs, lr)

In [23]:
################ Evaluating Loss ######################
#######################################################
def evaluate_loss(net, device, criterion, dataloader):
    net.eval()
    mean_loss = 0
    count = 0
    with torch.no_grad():
        for it, (multimodal_inputs, labels) in enumerate(tqdm(dataloader)):
            multimodal_inputs, labels = multimodal_inputs.to(device), labels.to(device)
            logits = net(multimodal_inputs)
            mean_loss += criterion(logits, labels).item()
            count += 1
    return mean_loss / count

In [24]:
#TODO: Define the training loop here
def train_model(net, criterion, opti, lr, lr_scheduler, train_loader, epochs, iters_to_accumulate):
    nb_iterations = len(train_loader)
    print_every = nb_iterations // 5  # print the training loss 5 times per epoch
    print_file = open('contrastive_results.txt', 'w')
    iters = []
    train_losses = []
    val_losses = []
    # Iterating over all epochs
    for ep in range(epochs):
        net.train()
        running_loss = 0.0
        for it, (multimodal_inputs, labels) in enumerate(tqdm(train_loader)):

            # Converting to cuda tensors
            multimodal_inputs, labels = multimodal_inputs.to(device), labels.to(device)
    		
            # Obtaining the logits from the model
            logits = net(multimodal_inputs)
            # print(logits.device)

            # Computing loss
            loss = criterion(logits, labels)
            loss = loss / iters_to_accumulate  # Normalize the loss because it is averaged

            # Backpropagating the gradients
            # Calls backward()
            loss.backward()

            if (it + 1) % iters_to_accumulate == 0:
                # Optimization step
                # scaler.step() first unscales the gradients of the optimizer's assigned params.
                # If these gradients do not contain infs or NaNs, opti.step() is then called,
                # otherwise, opti.step() is skipped.
                opti.step()
                # Adjust the learning rate based on the number of iterations.
                lr_scheduler.step()
                # Clear gradients
                net.zero_grad()


            running_loss += loss.item()

            if (it + 1) % print_every == 0:  # Print training loss information
                print(file=print_file)
                print("Iteration {}/{} of epoch {} complete. Loss : {} "
                      .format(it+1, nb_iterations, ep+1, running_loss / print_every), file=print_file)

                running_loss = 0.0

    # Saving the model
    model_name = 'contrast_head_visualbert'
    path_to_model='saved_models/{}_lr_{}_ep_{}.pt'.format(model_name, lr, epochs)
    torch.save(net.state_dict(), path_to_model)
    print("The model has been saved in {}".format(path_to_model))

    del loss
    torch.cuda.empty_cache()
    return net

In [25]:
# Train the model
model = train_model(contrast_model, criterion, optimizer, lr, scheduler, train_dataloader, epochs, iters_to_accumulate)

	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  p.grad.add_(weight_decay, p.data)
100%|██████████| 23/23 [00:02<00:00, 10.22it/s]
100%|██████████| 23/23 [00:00<00:00, 55.25it/s]
100%|██████████| 23/23 [00:00<00:00, 54.11it/s]
100%|██████████| 23/23 [00:00<00:00, 39.21it/s]
100%|██████████| 23/23 [00:00<00:00, 54.09it/s]
100%|██████████| 23/23 [00:00<00:00, 59.49it/s]
100%|██████████| 23/23 [00:00<00:00, 61.71it/s]
100%|██████████| 23/23 [00:00<00:00, 68.92it/s]
100%|██████████| 23/23 [00:00<00:00, 82.42it/s]
100%|██████████| 23/23 [00:00<00:00, 66.55it/s]
100%|██████████| 23/23 [00:00<00:00, 75.39it/s]
100%|██████████| 23/23 [00:00<00:00, 73.09it/s]
100%|██████████| 23/23 [00:00<00:00, 49.64it/s]
100%|██████████| 23/23 [00:00<00:00, 53.34it/s]
100%|██████████| 23/23 [00:00<00:00, 53.42it/s]
100%|██████████| 23/23 [00:00<00:00, 

The model has been saved in saved_models/contrast_head_visualbert_lr_5_ep_1000.pt
