## Import packages

In [1]:
from __future__ import print_function
import torch.utils.data
from scipy import misc
from torch import optim
from torchvision.utils import save_image
import numpy as np
import pickle
import time
import random
import os
import torch
from torch import nn
from torch.nn import functional as F
from tqdm.auto import tqdm, trange
from torchvision import transforms
import pandas as pd
import torchvision.datasets as datasets
import torch.utils.data as data
import copy
from torch.autograd import Variable
from torch.utils.data import Dataset
from skimage import io
from PIL import Image

from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, confusion_matrix
from transformers import AutoTokenizer, BertTokenizer, BertModel, BertForSequenceClassification
from collections import namedtuple
import torchvision.models as models
from sentence_transformers import SentenceTransformer
from laserembeddings import Laser

## Enable gpu device

In [2]:
device = torch.device('cuda:0')

## Set random seed

In [3]:
SEED = 8888

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

## Load data

In [4]:
train_dir = os.path.join('../', 'data', 'TRAINING','images')
trial_dir = os.path.join('../', 'data', 'Users', 'fersiniel', 'Desktop', 'MAMI - TO LABEL/TRIAL DATASET', 'images')

# load training label
train_df = pd.read_csv('../data/TRAINING/training.csv', sep='\t')
# load trial label
trial_df = pd.read_csv('../data/Users/fersiniel/Desktop/MAMI - TO LABEL/TRIAL DATASET/trial.csv', sep='\t')
# load test label
test_df = pd.read_csv('../data/test/Test.csv', sep='\t')

In [3]:
train_df.describe()

Unnamed: 0,misogynous,shaming,stereotype,objectification,violence
count,10000.0,10000.0,10000.0,10000.0,10000.0
mean,0.5,0.1274,0.281,0.2202,0.0953
std,0.500025,0.333437,0.44951,0.414402,0.293644
min,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0
50%,0.5,0.0,0.0,0.0,0.0
75%,1.0,0.0,1.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0


In [4]:
train_df.head(5)

Unnamed: 0,file_name,misogynous,shaming,stereotype,objectification,violence,Text Transcription
0,1.jpg,0,0,0,0,0,Milk Milk.zip
1,10.jpg,1,0,0,0,1,"ROSES ARE RED, VIOLETS ARE BLUE IF YOU DON'T S..."
2,1000.jpg,0,0,0,0,0,BREAKING NEWS: Russia releases photo of DONALD...
3,10000.jpg,0,0,0,0,0,MAN SEEKING WOMAN Ignad 18 O
4,10006.jpg,0,0,0,0,0,Me explaining the deep lore of. J.R.R. Tolkein...


## Define image transform

In [66]:
# pretrained_size = 128
# pretrained_size = 256
pretrained_size = 224
pretrained_means = [0.485, 0.456, 0.406]
pretrained_stds= [0.229, 0.224, 0.225]

# train_transforms = transforms.Compose([
#                            transforms.Resize(pretrained_size),
#                            transforms.RandomRotation(5),
#                            transforms.RandomHorizontalFlip(0.5),
#                            transforms.RandomCrop(pretrained_size, padding = 10),
#                            transforms.ToTensor(),
#                            transforms.Normalize(mean = pretrained_means, 
#                                                 std = pretrained_stds)
#                        ])

train_transforms = transforms.Compose([
                           transforms.ToTensor()
                       ])

# trial_transforms = transforms.Compose([
#                            transforms.Resize(pretrained_size),
#                            transforms.CenterCrop(pretrained_size),
#                            transforms.ToTensor(),
#                            transforms.Normalize(mean = pretrained_means, 
#                                                 std = pretrained_stds)
#                        ])

trial_transforms = train_transforms

## Load pre-trained model

In [5]:
# Clip pretrained model for image encoding

clip_pretrained = SentenceTransformer('clip-ViT-B-32')

# Laser model for text encoding

laser_model = Laser()

## Self-define Dataset class

In [6]:
class MAMIDataset(Dataset):
    """MAMI dataset."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.df = pd.read_csv(csv_file, sep='\t')
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.df.iloc[idx, 0])
        meme = Image.open(img_name)#.convert("RGB")   # convert to RGB is important
        meme = torch.Tensor(clip_pretrained.encode(meme))
        labels = self.df.iloc[idx, 1:-1]   # multi-labels
        labels = np.array(labels)
        labels = labels.astype('long')
        
        text = self.df.iloc[idx, -1]   # Text transcription
        text = torch.Tensor(laser_model.embed_sentences(text, lang='en'))
#         text = "[CLS] " + text + " [SEP]"   # Add special tokens
        
#         tokenized_text = tokenizer.tokenize(text)
#         # Map the token strings to their vocabulary indeces.
#         indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
#         segments_ids = [1] * len(tokenized_text)

#         tokens_tensor = torch.tensor([indexed_tokens])
#         segments_tensor = torch.tensor([segments_ids])

#         text_ids = tokenizer(text, return_tensors="pt", padding='max_length', max_length=512, truncation=True)
        
        sample = {'meme': meme, 'labels': labels, 'text': text}

        if self.transform:
            sample['meme'] = self.transform(meme)

        return sample

In [7]:
class MAMITestset(Dataset):
    """MAMI dataset."""

    def __init__(self, csv_file, root_dir, transform=None):
        """
        Args:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.df = pd.read_csv(csv_file, sep='\t')
        self.root_dir = root_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.df.iloc[idx, 0])
        meme = Image.open(img_name)#.convert("RGB")   # convert to RGB is important
        meme = torch.Tensor(clip_pretrained.encode(meme))
        
        text = self.df.iloc[idx, -1]   # Text transcription
        text = torch.Tensor(laser_model.embed_sentences(text, lang='en'))
        
#         text = "[CLS] " + text + " [SEP]"   # Add special tokens
        
#         tokenized_text = tokenizer.tokenize(text)
#         # Map the token strings to their vocabulary indeces.
#         indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
#         segments_ids = [1] * len(tokenized_text)

#         tokens_tensor = torch.tensor([indexed_tokens])
#         segments_tensor = torch.tensor([segments_ids])
        
        sample = {'meme': meme, 'text': text}

        if self.transform:
            sample['meme'] = self.transform(meme)

        return sample

## Instantiate the train and trial dataset

In [8]:
trial_root_dir = '../data/Users/fersiniel/Desktop/MAMI - TO LABEL/TRIAL DATASET/'
# trial_data = MAMIDataset(trial_root_dir + 'trial.csv', trial_root_dir, trial_transforms)
trial_data = MAMIDataset(trial_root_dir + 'trial.csv', trial_root_dir)

train_root_dir = '../data/TRAINING/'
# train_data = MAMIDataset(train_root_dir + 'training.csv', train_root_dir, train_transforms)
train_data = MAMIDataset(train_root_dir + 'training.csv', train_root_dir)

In [9]:
test_root_dir = '../data/test/'
# train_data = MAMIDataset(train_root_dir + 'training.csv', train_root_dir, train_transforms)
test_data = MAMITestset(test_root_dir + 'Test.csv', test_root_dir)

## Train valid split

In [10]:
VALID_RATIO = 0.9

n_train_examples = int(len(train_data) * VALID_RATIO)
n_valid_examples = len(train_data) - n_train_examples

train_data, valid_data = data.random_split(train_data, 
                                           [n_train_examples, n_valid_examples])

In [11]:
valid_data = copy.deepcopy(valid_data)
# valid_data.dataset.transform = trial_transforms

In [12]:
print(f'Number of training examples: {len(train_data)}')
print(f'Number of validation examples: {len(valid_data)}')
print(f'Number of testing examples: {len(trial_data)}')

Number of training examples: 9000
Number of validation examples: 1000
Number of testing examples: 100


## Create batch iterators

In [13]:
BATCH_SIZE = 64

In [14]:
train_iterator = data.DataLoader(train_data, 
                                 shuffle = True, 
                                 batch_size = BATCH_SIZE)

valid_iterator = data.DataLoader(valid_data, 
                                 batch_size = BATCH_SIZE)

trial_iterator = data.DataLoader(trial_data, 
                                batch_size = BATCH_SIZE)

In [15]:
test_iterator = data.DataLoader(test_data, 
                                batch_size = BATCH_SIZE)

## Construct VAE class

In [17]:
class VAE(nn.Module):
    def __init__(self, zsize, output_dim=2):
        super(VAE, self).__init__()
        
        self.zsize = zsize
        self.fc1 = nn.Linear(zsize, zsize)   # 4 * 4 is the current size of the image
        self.fc2 = nn.Linear(zsize, zsize)

        ######
        # multi-tasks sub-networks
        self.fc_misogynous = nn.Linear(zsize, output_dim)
        self.fc_shaming = nn.Linear(zsize, output_dim)
        self.fc_stereotype = nn.Linear(zsize, output_dim)
        self.fc_objectification = nn.Linear(zsize, output_dim)
        self.fc_violence = nn.Linear(zsize, output_dim)
        
        
        # encoder layers
        self.enc_txt_fc = nn.Linear(1024, int(0.5 * zsize))
        self.enc_img_fc1 = nn.Linear(512, int(0.5 * zsize))
#         self.enc_img_fc2 = nn.Linear(1024, int(0.5 * zsize))
        
        # decoder layers
        self.dec_txt_fc = nn.Linear(zsize, 1024)
        self.dec_img_fc1 = nn.Linear(zsize, 512)
#         self.dec_img_fc2 = nn.Linear(1024, 2048)

        # batch normalizations
        self.enc_txt_bn = nn.BatchNorm1d(num_features=int(0.5 * zsize))
        self.enc_img_bn1 = nn.BatchNorm1d(num_features=int(0.5 * zsize))
#         self.enc_img_bn2 = nn.BatchNorm1d(num_features=int(0.5 * zsize))
        
        self.dec_txt_bn = nn.BatchNorm1d(num_features=1024)
        self.dec_img_bn1 = nn.BatchNorm1d(num_features=512)
#         self.dec_img_bn2 = nn.BatchNorm1d(num_features=2048)
        
        # dropout
        self.dropout_txt_enc = nn.Dropout(0.2)
        self.dropout_img_enc = nn.Dropout(0.2)
        self.dropout_txt_dec = nn.Dropout(0.2)
        self.dropout_img_dec = nn.Dropout(0.2)
        
        
    def img_encode(self, x_img):
#         _, x_img = self.resnet_pretrained(x_img)
        x_img = F.relu(self.dropout_img_enc(self.enc_img_bn1(self.enc_img_fc1(x_img))))
#         x_img = F.relu(self.enc_img_fc2(x_img))
        
        return x_img   # [bs, 2048]

    def txt_encode(self, x_txt):

        x_txt = x_txt.view(x_txt.shape[0], 1024)
        x_txt = F.relu(self.dropout_txt_enc(self.enc_txt_bn(self.enc_txt_fc(x_txt))))
        return x_txt   # [bs, 0.5 * zsize]

    def encode(self, x_img, x_txt):
        
        x_img = self.img_encode(x_img)
        
        x_txt = self.txt_encode(x_txt)
        
        # concate x_img and x_txt
        x = torch.cat((x_txt, x_img), 1)
        
        h1 = self.fc1(x)   # mu
        h2 = self.fc2(x)   # logvar
        return h1, h2
    
    def subtask_misogynous(self, z):
        
        h = self.fc_misogynous(z)
        return h
    
    def subtask_shaming(self, z):
        
        h = self.fc_shaming(z)
        return h
    
    def subtask_stereotype(self, z):

        h = self.fc_stereotype(z)
        return h
    
    def subtask_objectification(self, z):

        h = self.fc_objectification(z)
        return h
    
    def subtask_violence(self, z):

        h = self.fc_violence(z)
        return h

    def reparameterize(self, mu, logvar):
        if self.training:
            std = torch.exp(0.5 * logvar)
            eps = torch.randn_like(std)
            return eps.mul(std).add_(mu)
        else:
            return mu

    def decode(self, x):
#         x = x.view(x.shape[0], self.zsize)   # flatten

        # Decoding txt
        dec_x_txt = F.relu(self.dropout_txt_dec(self.dec_txt_bn(self.dec_txt_fc(x))))
        
        # Decoding img
        dec_x_img = F.relu(self.dropout_img_dec(self.dec_img_bn1(self.dec_img_fc1(x))))
#         dec_x_img = F.relu(self.dec_img_fc2(dec_x_img))
        
        return dec_x_img, dec_x_txt

    def forward(self, x_img, x_txt):
        mu, logvar = self.encode(x_img, x_txt)
        mu = mu.squeeze()
        logvar = logvar.squeeze()
        z = self.reparameterize(mu, logvar)

        y_misogynous = self.subtask_misogynous(z)
        y_shaming = self.subtask_shaming(z)
        y_stereotype = self.subtask_stereotype(z)
        y_objectification = self.subtask_objectification(z)
        y_violence = self.subtask_violence(z)
        
        y_pred = dict()
        y_pred["misogynous"] = y_misogynous
        y_pred["shaming"] = y_shaming
        y_pred["stereotype"] = y_stereotype
        y_pred["objectification"] = y_objectification
        y_pred["violence"] = y_violence
        
        dec_x_img, dec_x_txt = self.decode(z.view(-1, self.zsize))
        
        return dec_x_img, dec_x_txt, mu, logvar, y_pred

    def weight_init(self, mean, std):
        for m in self._modules:
            normal_init(self._modules[m], mean, std)


In [25]:
def normal_init(m, mean, std):
    if isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Conv2d):
        m.weight.data.normal_(mean, std)
        m.bias.data.zero_()

# Training

In [26]:
def loss_function(recon_x_img, recon_x_txt, x_img, x_txt, mu, logvar):
    BCE_img = torch.mean((recon_x_img - x_img)**2)
    BCE_txt = torch.mean((recon_x_txt - x_txt)**2)

    # see Appendix B from VAE paper:
    # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
    # https://arxiv.org/abs/1312.6114
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    KLD = -0.5 * torch.mean(torch.mean(1 + logvar - mu.pow(2) - logvar.exp(), 1))
    return BCE_img, BCE_txt, KLD * 0.1

In [27]:
f = open("VAE_result.txt", "w")

In [28]:
def main():
    
    name_dict = dict()
    name_dict["misogynous"] = 0
    name_dict["shaming"] = 1
    name_dict["stereotype"] = 2
    name_dict["objectification"] = 3
    name_dict["violence"] = 4
    
    #batch_size = 32
    z_size = 512
#     z_size = 1024
    vae = VAE(z_size)
    vae.cuda()
    vae.train()
    vae.weight_init(mean=0, std=0.02)

    lr = 0.0001

    vae_optimizer = optim.Adam(vae.parameters(), lr=lr, betas=(0.5, 0.999), weight_decay=1e-5)
    
    criterion = nn.CrossEntropyLoss()
    criterion.to(device)
 
    train_epoch = 30

    
    dataloader = train_iterator
    
    f1_max = 0
    max_acc = 0
    
    for epoch in range(train_epoch):
        vae.train()

        rec_txt_loss = 0
        rec_img_loss = 0
        kl_loss = 0
        subtask_misogynous_loss = 0
        subtask_shaming_loss = 0
        subtask_stereotype_loss = 0
        subtask_objectification_loss = 0
        subtask_violence_loss = 0

        epoch_start_time = time.time()

        if (epoch + 1) % 8 == 0:
            vae_optimizer.param_groups[0]['lr'] /= 4
#             print("learning rate change!")
            f.write("learning rate change! The learning rate is %1.4f now\n" % (lr))

#         i = 0
        acc = 0
        num = 0
        for i, data in tqdm(enumerate(dataloader, 0), desc='iterations'):
        #for x in batches:
            vae.train()
            
            #inputs, classes = data
            img_inputs = data['meme']
            img_inputs = img_inputs.to(device)

            txt_inputs = data["text"]
            txt_inputs = txt_inputs.to(device)
            
            classes = data['labels']
            
            # multi-task labels
            classes_misogynous = classes[:, 0]
            classes_shaming = classes[:, 1]
            classes_stereotype = classes[:, 2]
            classes_objectification = classes[:, 3]
            classes_violence = classes[:, 4]
            
            img_inputs, txt_inputs, classes_misogynous = Variable(img_inputs), Variable(txt_inputs), Variable(classes_misogynous)
            classes_shaming = Variable(classes_shaming)
            classes_stereotype = Variable(classes_stereotype)
            classes_objectification = Variable(classes_stereotype)
            classes_violence = Variable(classes_violence)
        
            img_inputs = img_inputs.to(device)
            txt_inputs = txt_inputs.to(device)
            classes_misogynous = classes_misogynous.to(device)
            classes_shaming = classes_shaming.to(device)
            classes_stereotype = classes_stereotype.to(device)
            classes_objectification = classes_objectification.to(device)
            classes_violence = classes_violence.to(device)
            
            vae.zero_grad()
#             rec, mu, logvar = vae(x)
            rec_img, rec_txt, mu, logvar, y_pred = vae(img_inputs, txt_inputs)

            loss_re_img, loss_re_txt, loss_kl = loss_function(rec_img, rec_txt, img_inputs, txt_inputs, mu, logvar)
            loss_subtask_misogynous = criterion(y_pred["misogynous"], classes_misogynous)
            loss_subtask_shaming = criterion(y_pred["shaming"], classes_shaming)
            loss_subtask_stereotype = criterion(y_pred["stereotype"], classes_stereotype)
            loss_subtask_objectification = criterion(y_pred["objectification"], classes_objectification)
            loss_subtask_violence = criterion(y_pred["violence"], classes_violence)
            
            (loss_re_img + loss_re_txt + loss_kl + loss_subtask_misogynous \
             + loss_subtask_shaming + loss_subtask_stereotype + loss_subtask_objectification\
             + loss_subtask_violence).backward()
            
            vae_optimizer.step()
            rec_img_loss += loss_re_img.item()
            rec_txt_loss += loss_re_txt.item()
            
            kl_loss += loss_kl.item()
            subtask_misogynous_loss += loss_subtask_misogynous.item()
            subtask_shaming_loss += loss_subtask_shaming.item()
            subtask_stereotype_loss += loss_subtask_stereotype.item()
            subtask_objectification_loss += loss_subtask_objectification.item()
            subtask_violence_loss += loss_subtask_violence.item()
            
            # Calculate batch accuracy
            _, top_pred = y_pred["misogynous"].topk(1, 1)
            y = classes_misogynous.cpu()
            batch_size = y.shape[0]
            top_pred = top_pred.cpu().view(batch_size)
            acc += sum(top_pred == y).item()
            num += batch_size

            #############################################

            epoch_end_time = time.time()
            per_epoch_ptime = epoch_end_time - epoch_start_time

            # report losses and save samples each 60 iterations
            m = len(dataloader)
            i += 1
            if i % m == 0:
                rec_txt_loss /= m
                rec_img_loss /= m
                kl_loss /= m
                subtask_misogynous_loss /= m
                subtask_shaming_loss /= m
                subtask_stereotype_loss /= m
                subtask_objectification_loss /= m
                subtask_violence_loss /= m
                
#                 print('\n[%d/%d] - ptime: %.2f, rec img loss: %.9f, rec txt loss: %.9f, KL loss: %.9f, misogynous loss: %.9f, shaming loss: %.9f, stereotype loss: %.9f, objectification loss: %.9f, violence loss: %.9f' % (
#                     (epoch + 1), train_epoch, per_epoch_ptime, rec_img_loss, rec_txt_loss, kl_loss, subtask_misogynous_loss, subtask_shaming_loss, subtask_stereotype_loss, subtask_objectification_loss, subtask_violence_loss))

                f.write('\n[%d/%d] - ptime: %.2f, rec img loss: %.9f, rec txt loss: %.9f, KL loss: %.9f, misogynous loss: %.9f, shaming loss: %.9f, stereotype loss: %.9f, objectification loss: %.9f, violence loss: %.9f\n' % (
                    (epoch + 1), train_epoch, per_epoch_ptime, rec_img_loss, rec_txt_loss, kl_loss, subtask_misogynous_loss, subtask_shaming_loss, subtask_stereotype_loss, subtask_objectification_loss, subtask_violence_loss))
                rec_txt_loss = 0
                rec_img_loss = 0
                kl_loss = 0
                with torch.no_grad():
#                     test_loss, test_acc, test_accuracy, test_f1, test_recall, test_precision = evaluate(vae, valid_iterator, criterion, device, "misogynous")
                    test_loss, test_acc, test_accuracy, test_f1, test_recall, test_precision = evaluate(vae, valid_iterator, criterion, device)
                    f.write(f'Test subtask misogynous Loss: {test_loss["misogynous"]:.3f} | Test Acc @1: {test_acc["misogynous"]*100:6.2f}%\n')
                    f.write(f'Test subtask misogynous accuracy: {test_accuracy["misogynous"]*100:6.2f}%\n')
                    f.write(f'Test subtask misogynous f1: {test_f1["misogynous"]*100:6.2f}%\n')
                    f.write(f'Test subtask misogynous recall: {test_recall["misogynous"]*100:6.2f}%\n')
                    f.write(f'Test subtask misogynous precision: {test_precision["misogynous"]*100:6.2f}%\n')
                    
#                     test_loss, test_acc, test_accuracy, test_f1, test_recall, test_precision = evaluate(vae, trial_iterator, criterion, device, "shaming")
                    f.write(f'Test subtask shaming Loss: {test_loss["shaming"]:.3f} | Test Acc @1: {test_acc["shaming"]*100:6.2f}%\n')
                    f.write(f'Test subtask shaming accuracy: {test_accuracy["shaming"]*100:6.2f}%\n')
                    f.write(f'Test subtask shaming f1: {test_f1["shaming"]*100:6.2f}%\n')
                    f.write(f'Test subtask shaming recall: {test_recall["shaming"]*100:6.2f}%\n')
                    f.write(f'Test subtask shaming precision: {test_precision["shaming"]*100:6.2f}%\n')
                    
#                     test_loss, test_acc, test_accuracy, test_f1, test_recall, test_precision = evaluate(vae, trial_iterator, criterion, device, "stereotype")
                    f.write(f'Test subtask stereotype Loss: {test_loss["stereotype"]:.3f} | Test Acc @1: {test_acc["stereotype"]*100:6.2f}%\n')
                    f.write(f'Test subtask stereotype accuracy: {test_accuracy["stereotype"]*100:6.2f}%\n')
                    f.write(f'Test subtask stereotype f1: {test_f1["stereotype"]*100:6.2f}%\n')
                    f.write(f'Test subtask stereotype recall: {test_recall["stereotype"]*100:6.2f}%\n')
                    f.write(f'Test subtask stereotype precision: {test_precision["stereotype"]*100:6.2f}%\n')
                    
#                     test_loss, test_acc, test_accuracy, test_f1, test_recall, test_precision = evaluate(vae, trial_iterator, criterion, device, "objectification")
                    f.write(f'Test subtask objectification Loss: {test_loss["objectification"]:.3f} | Test Acc @1: {test_acc["objectification"]*100:6.2f}%\n')
                    f.write(f'Test subtask objectification accuracy: {test_accuracy["objectification"]*100:6.2f}%\n')
                    f.write(f'Test subtask objectification f1: {test_f1["objectification"]*100:6.2f}%\n')
                    f.write(f'Test subtask objectification recall: {test_recall["objectification"]*100:6.2f}%\n')
                    f.write(f'Test subtask objectification precision: {test_precision["objectification"]*100:6.2f}%\n')
                    
#                     test_loss, test_acc, test_accuracy, test_f1, test_recall, test_precision = evaluate(vae, trial_iterator, criterion, device, "violence")
                    f.write(f'Test subtask violence Loss: {test_loss["violence"]:.3f} | Test Acc @1: {test_acc["violence"]*100:6.2f}%\n')
                    f.write(f'Test subtask violence accuracy: {test_accuracy["violence"]*100:6.2f}%\n')
                    f.write(f'Test subtask violence f1: {test_f1["violence"]*100:6.2f}%\n')
                    f.write(f'Test subtask violence recall: {test_recall["violence"]*100:6.2f}%\n')
                    f.write(f'Test subtask violence precision: {test_precision["violence"]*100:6.2f}%\n')
                    
                    acc /= num
                    print(f'num_correct: {acc}\n')
                    print(f'total_num: {num}\n')
                    f.write(f'Training accuracy: {acc*100:6.2f}%\n')
                    
                    if test_f1["misogynous"]*100 >= f1_max:
                        
                        torch.save(vae.state_dict(), "VAEmodel-epoch-%d.pkl" % (epoch+1))
                        f.write("Epoch [%d/%d]: test f1 on misogynous improves, saving training results\n" % (epoch+1, train_epoch))
                        f1_max = test_f1["misogynous"]*100

        f.flush()

    f.write("Training finish!... save training results\n")
    return vae

In [29]:
def calculate_accuracy(y_pred, y):
    with torch.no_grad():
        batch_size = y.shape[0]
        _, top_pred = y_pred.topk(1, 1)
        top_pred = top_pred.t()
        correct = top_pred.eq(y.view(1, -1).expand_as(top_pred))
        correct_1 = correct[:1].reshape(-1).float().sum(0, keepdim = True)
        acc_1 = correct_1 / batch_size
    
    top_pred = top_pred.cpu().view(batch_size)
    y = y.cpu()
    
    accuracy = accuracy_score(y, top_pred)
    #print("accuracy: {}".format(accuracy))

    f1 = f1_score(y, top_pred)
#     print(top_pred)
    #print("f1: {}".format(f1))

    recall = recall_score(y, top_pred)
    #print("recall: {}".format(recall))

    precision = precision_score(y, top_pred)
    #print("precision: {}".format(precision))

    cm = confusion_matrix(y, top_pred)
    #print("cm: {}".format(cm))
    return acc_1, accuracy, f1, recall, precision, cm

In [30]:
def evaluate(model, iterator, criterion, device, subtask_name="misogynous"):
    
    epoch_loss = dict()
    epoch_loss["misogynous"] = 0
    epoch_loss["shaming"] = 0
    epoch_loss["stereotype"] = 0
    epoch_loss["objectification"] = 0
    epoch_loss["violence"] = 0
    
    epoch_acc = dict()
    epoch_acc["misogynous"] = 0
    epoch_acc["shaming"] = 0
    epoch_acc["stereotype"] = 0
    epoch_acc["objectification"] = 0
    epoch_acc["violence"] = 0
    
    epoch_accuracy = dict()
    epoch_accuracy["misogynous"] = 0
    epoch_accuracy["shaming"] = 0
    epoch_accuracy["stereotype"] = 0
    epoch_accuracy["objectification"] = 0
    epoch_accuracy["violence"] = 0
    
    epoch_f1 = dict()
    epoch_f1["misogynous"] = 0
    epoch_f1["shaming"] = 0
    epoch_f1["stereotype"] = 0
    epoch_f1["objectification"] = 0
    epoch_f1["violence"] = 0
    
    epoch_recall = dict()
    epoch_recall["misogynous"] = 0
    epoch_recall["shaming"] = 0
    epoch_recall["stereotype"] = 0
    epoch_recall["objectification"] = 0
    epoch_recall["violence"] = 0
    
    epoch_precision = dict()
    epoch_precision["misogynous"] = 0
    epoch_precision["shaming"] = 0
    epoch_precision["stereotype"] = 0
    epoch_precision["objectification"] = 0
    epoch_precision["violence"] = 0
    
    epoch_cm = dict()
    epoch_cm["misogynous"] = 0
    epoch_cm["shaming"] = 0
    epoch_cm["stereotype"] = 0
    epoch_cm["objectification"] = 0
    epoch_cm["violence"] = 0
    
    model.eval()
    
    name_dict = dict()
    name_dict["misogynous"] = 0
    name_dict["shaming"] = 1
    name_dict["stereotype"] = 2
    name_dict["objectification"] = 3
    name_dict["violence"] = 4
    
    with torch.no_grad():
        
        #for (x, y) in iterator:
        for i, data in tqdm(enumerate(iterator, 0), desc='iterations'):

            x_img = data['meme']
            x_img = x_img.to(device)
            
            x_txt = data['text']
            x_txt = x_txt.to(device)
            
            
            y = data['labels']
            
            x_img, x_txt = x_img.to(device), x_txt.to(device)

            _, _, _, _, y_pred = model(x_img, x_txt)
            

            
            for subtask_name, subtask_index in name_dict.items():
                subtask_y = y[:, subtask_index]
                subtask_y = subtask_y.to(device)
                loss = criterion(y_pred[subtask_name], subtask_y)
                acc, accuracy, f1, recall, precision, cm = calculate_accuracy(y_pred[subtask_name], subtask_y)
                
                epoch_loss[subtask_name] += loss.item()
                epoch_acc[subtask_name] += acc.item()
                epoch_accuracy[subtask_name] += accuracy.item()
                epoch_f1[subtask_name] += f1.item()
                epoch_recall[subtask_name] += recall.item()
                epoch_precision[subtask_name] += precision.item()
                # epoch_cm += cm.item()
                
                
    for subtask_name, subtask_index in name_dict.items():
        epoch_loss[subtask_name] /= len(iterator)
        epoch_acc[subtask_name] /= len(iterator)
        epoch_accuracy[subtask_name] /= len(iterator)
        epoch_f1[subtask_name] /= len(iterator)
        epoch_recall[subtask_name] /= len(iterator)
        epoch_precision[subtask_name] /= len(iterator)
                
        
    return epoch_loss, epoch_acc, epoch_accuracy, epoch_f1, epoch_recall, epoch_precision

In [None]:
vae = main()
criterion = nn.CrossEntropyLoss()
criterion.to(device)
test_loss, test_acc, test_accuracy, test_f1, test_recall, test_precision = evaluate(vae, trial_iterator, criterion, device, "misogynous")

f.write(f'Trial subtask misogynous Loss: {test_loss:.3f} | Test Acc @1: {test_acc*100:6.2f}%\n')
f.write(f'Trial subtask misogynous accuracy: {test_accuracy*100:6.2f}%\n')
f.write(f'Trial subtask misogynous f1: {test_f1*100:6.2f}%\n')
f.write(f'Trial subtask misogynous recall: {test_recall*100:6.2f}%\n')
f.write(f'Trial subtask misogynous precision: {test_precision*100:6.2f}%\n')

test_loss, test_acc, test_accuracy, test_f1, test_recall, test_precision = evaluate(vae, trial_iterator, criterion, device, "shaming")

f.write(f'Trial subtask shaming Loss: {test_loss:.3f} | Test Acc @1: {test_acc*100:6.2f}%\n')
f.write(f'Trial subtask shaming accuracy: {test_accuracy*100:6.2f}%\n')
f.write(f'Trial subtask shaming f1: {test_f1*100:6.2f}%\n')
f.write(f'Trial subtask shaming recall: {test_recall*100:6.2f}%\n')
f.write(f'Trial subtask shaming precision: {test_precision*100:6.2f}%\n')

f.close()

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='iterations', max=1.0, style=ProgressSty…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='iterations', max=1.0, style=ProgressSty…

  _warn_prf(average, modifier, msg_start, len(result))



num_correct: 0.7284444444444444

total_num: 9000




HBox(children=(FloatProgress(value=1.0, bar_style='info', description='iterations', max=1.0, style=ProgressSty…

# Testing

In [18]:
def test(model, iterator, device):
    
    name_dict = dict()
    name_dict["misogynous"] = 0
    name_dict["shaming"] = 1
    name_dict["stereotype"] = 2
    name_dict["objectification"] = 3
    name_dict["violence"] = 4
    
    y_test = dict()
    y_test["misogynous"] = []
    y_test["shaming"] = []
    y_test["stereotype"] = []
    y_test["objectification"] = []
    y_test["violence"] = []
    
    model.cuda()
    model.eval()
    
    with torch.no_grad():
        
        for i, data in tqdm(enumerate(iterator, 0), desc='iterations'):

            x_img = data['meme']
            x_img = x_img.to(device)
            
            x_txt = data['text']
            
            x_img, x_txt = x_img.to(device), x_txt.to(device)

            _, _, _, _, y_pred = model(x_img, x_txt)
            
            
            for subtask_name, subtask_index in name_dict.items():
                subtask_y = y_pred[subtask_name].cpu()
                for dp in subtask_y:
                    if dp[0] >= dp[1]:
                        y_test[subtask_name].append(0)
                    else:
                        y_test[subtask_name].append(1)
        
    return y_test

In [33]:
best_VAE = VAE(512)
best_VAE.load_state_dict(torch.load("VAEmodel-6.pkl"))

<All keys matched successfully>

In [34]:
y_test = test(best_VAE, test_iterator, device)

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='iterations', max=1.0, style=ProgressSty…




In [35]:
len(y_test["misogynous"])

1000

In [36]:
prediction_df = test_df.copy()

prediction_df["misogynous"] = y_test["misogynous"]
prediction_df["shaming"] = y_test["shaming"]
prediction_df["stereotype"] = y_test["stereotype"]
prediction_df["objectification"] = y_test["objectification"]
prediction_df["violence"] = y_test["violence"]

In [37]:
prediction_df = prediction_df.drop('Text Transcription', 1)
prediction_df

Unnamed: 0,file_name,misogynous,shaming,stereotype,objectification,violence
0,15236.jpg,1,0,0,0,0
1,15805.jpg,1,0,1,1,0
2,16254.jpg,1,0,0,0,0
3,16191.jpg,1,0,0,0,0
4,15952.jpg,1,0,0,0,0
...,...,...,...,...,...,...
995,15591.jpg,1,0,1,1,0
996,15049.jpg,1,0,0,0,0
997,15363.jpg,1,0,0,0,0
998,15199.jpg,1,0,1,1,0
