In [None]:
!rm -Rf sample_data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Import Data

In [None]:
!cp /content/drive/MyDrive/Wipro/Final/Data/Memotion/images.zip /content/drive/MyDrive/Wipro/Final/Data/Memotion/csv/memotion_train_sentences.csv .

In [None]:
!unzip -q images.zip

In [None]:
!rm images.zip

In [None]:
!pip install timm
!pip install transformers

Collecting timm
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[?25l[K     |▉                               | 10 kB 35.0 MB/s eta 0:00:01[K     |█▊                              | 20 kB 31.2 MB/s eta 0:00:01[K     |██▋                             | 30 kB 19.0 MB/s eta 0:00:01[K     |███▌                            | 40 kB 17.5 MB/s eta 0:00:01[K     |████▍                           | 51 kB 8.2 MB/s eta 0:00:01[K     |█████▏                          | 61 kB 9.2 MB/s eta 0:00:01[K     |██████                          | 71 kB 7.7 MB/s eta 0:00:01[K     |███████                         | 81 kB 8.6 MB/s eta 0:00:01[K     |███████▉                        | 92 kB 7.2 MB/s eta 0:00:01[K     |████████▊                       | 102 kB 7.9 MB/s eta 0:00:01[K     |█████████▋                      | 112 kB 7.9 MB/s eta 0:00:01[K     |██████████▍                     | 122 kB 7.9 MB/s eta 0:00:01[K     |███████████▎                    | 133 kB 7.9 MB/s eta 0:00:01[K    

# Model

In [None]:
import shutil
import torch
from torch.utils.data import DataLoader,Dataset
import transformers
from transformers import CLIPModel, CLIPConfig
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
import os 
import pandas as pd
import cv2 as cv
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import WeightedRandomSampler
from sklearn.metrics import  f1_score
from tqdm.autonotebook import tqdm

import tensorflow as tf
import tensorboard as tb
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile


from tensorboard.plugins import projector

In [None]:
import numpy as np
import pandas as pd

train_df = pd.read_csv('memotion_train_sentences.csv')
train_df.head()

Unnamed: 0,image_name,text_corrected,humour,sarcasm,offensive,motivational,overall_sentiment
0,image_1.jpg,look there my friend lightyear now all sohalik...,3,1,0,0,2
1,image_2.jpeg,the best of #10 yearchallenge! completed in le...,0,1,0,1,2
2,image_3.JPG,sam thorne @strippin ( follow follow saw every...,2,0,0,0,2
3,image_4.png,10 year challenge - sweet dee edition,2,2,2,1,2
4,image_5.png,10 year challenge with no filter 47 hilarious ...,3,3,2,0,1


In [None]:
train_df.shape

(6992, 7)

In [None]:
train_df.columns

Index(['image_name', 'text_corrected', 'humour', 'sarcasm', 'offensive',
       'motivational', 'overall_sentiment'],
      dtype='object')

In [None]:
rem_images = ['image_1567.jpg','image_4924.jpg','image_5119.png','image_6357.jpg']
train_df.drop(train_df[train_df['image_name'].isin(rem_images)].index,inplace=True)
train_df.dropna(inplace=True)

In [None]:
train_df.shape

(6983, 7)

In [None]:
train_df.columns

Index(['image_name', 'text_corrected', 'humour', 'sarcasm', 'offensive',
       'motivational', 'overall_sentiment'],
      dtype='object')

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
device

device(type='cuda', index=0)

In [None]:
# CLIP Tokeniser
tokeniser = transformers.CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")

Downloading:   0%|          | 0.00/862k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/525k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/389 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/568 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.49M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/3.98k [00:00<?, ?B/s]

ftfy or spacy is not installed using BERT BasicTokenizer instead of ftfy.


In [None]:
class AvgMeter:
    def __init__(self, name="Metric"):
        self.name = name
        self.reset()

    def reset(self):
        self.avg, self.sum, self.count = [0] * 3

    def update(self, val, count=1):
        self.count += count
        self.sum += val * count
        self.avg = self.sum / self.count

    def __repr__(self):
        text = f"{self.name}: {self.avg:.4f}"
        return text

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group["lr"]

In [None]:
'''
  Dataset class
'''
class MemotionData(Dataset):
    def __init__(self,root_dir,img_names,ocr_text,sentiments,tokeniser,transforms=None):
        
        self.img_names = img_names
        self.ocr_text = ocr_text
        self.sentiments = sentiments
        self.tokeniser = tokeniser
        self.root_dir = root_dir
        self.transforms = transforms
        
    def __len__(self):
        return len(self.img_names)
        
        
    def __getitem__(self,idx):
        
        # print(self.img_names[idx])

        img = cv.imread(os.path.join(self.root_dir,self.img_names[idx]))
        img = cv.cvtColor(img,cv.COLOR_BGR2RGB)
        
        if self.transforms:
            img = self.transforms(img)
        
        output_token_ids = self.tokeniser.encode_plus(
            self.ocr_text[idx],
            max_length=76,
            add_special_tokens=True,
            padding='max_length',
            return_tensors='pt',
            return_attention_mask=True,
            truncation = True
        )
        
        return {
            'img': torch.FloatTensor(img),
            'input_ids': output_token_ids['input_ids'],
            'att_mask': output_token_ids['attention_mask'],
            'sentiment': torch.tensor(self.sentiments[idx],dtype=torch.long)
        }
        


def create_dataset(df,tokeniser,folder,max_len=76):
    ds = MemotionData(
        root_dir = folder,
        img_names = df['image_name'].to_list(),
        ocr_text= df['text'].to_list(),
        sentiments = df['label'].to_list(),
        tokeniser = tokeniser,
        transforms = torchvision.transforms.Compose(
        [
                torchvision.transforms.ToPILImage(),
                torchvision.transforms.Resize((224,224)),
                torchvision.transforms.ToTensor(),
        ]
        )
    )
    return ds

In [None]:
'''
  To create model based on CLIP 
'''
class MemotionModel(nn.Module):
    def __init__(self,scratch=True):
        super(MemotionModel,self).__init__()
        self.pre_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
        self.scratch = scratch
        
        if scratch:
          for params in self.pre_model.parameters():
              params.requires_grad = True
        
        else:
          for params in self.pre_model.parameters():
              params.requires_grad = False
                
    def forward(self,x,input_ids,att_mask):
        img_embed =  self.pre_model.get_image_features(x)
        text_embed = self.pre_model.get_text_features(input_ids.squeeze(1),attention_mask=att_mask.squeeze(1))
        return img_embed, text_embed
        

def calc_loss(image_embeddings, text_embeddings, temperature=1.0):
    logits = (text_embeddings @ image_embeddings.T) / temperature
    images_similarity = image_embeddings @ image_embeddings.T
    texts_similarity = text_embeddings @ text_embeddings.T
    targets = F.softmax(
        (images_similarity + texts_similarity) / 2 * temperature, dim=-1
    )
    texts_loss = cross_entropy(logits, targets, reduction='none')
    images_loss = cross_entropy(logits.T, targets.T, reduction='none')
    loss =  (images_loss + texts_loss) / 2.0 # shape: (batch_size)
    return logits, targets, loss.mean()

def cross_entropy(preds, targets, reduction='none'):
    log_softmax = nn.LogSoftmax(dim=-1)
    loss = (-targets * log_softmax(preds)).sum(1)
    if reduction == "none":
        return loss
    elif reduction == "mean":
        return loss.mean()

# k-Fold

In [None]:
from sklearn.model_selection import KFold

X = [(i[0], i[1]) for i in train_df.values]
y = train_df['overall_sentiment'].values

In [None]:
X[0], y[0]

(('image_1.jpg',
  'look there my friend lightyear now all sohalikut trend play the 10 years challenge at facebook imgflip.com'),
 2)

In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=21)
kf

KFold(n_splits=5, random_state=21, shuffle=True)

In [None]:
X_splits = []
for train_index, test_index in kf.split(X):
    X_train_df = pd.DataFrame()
    X_test_df = pd.DataFrame()

    X_train_im, X_train_sen, X_test_im, X_test_sen, y_train, y_test = [], [], [], [], [], []
    for i in train_index:
        X_train_im.append(X[i][0])
        X_train_sen.append(X[i][0])
        y_train.append(y[i])
    for i in test_index:
        X_test_im.append(X[i][0])
        X_test_sen.append(X[i][0])
        y_test.append(y[i])

    X_train_df['image_name'] = X_train_im
    X_train_df['text'] = X_train_sen
    X_train_df['label'] = y_train

    X_test_df['image_name'] = X_test_im
    X_test_df['text'] = X_test_sen
    X_test_df['label'] = y_test

    X_splits.append((X_train_df, X_test_df))

In [None]:
X_splits[0][0].columns

Index(['image_name', 'text', 'label'], dtype='object')

In [None]:
X_splits[0][0].shape, X_splits[0][1].shape

((5586, 3), (1397, 3))

# Sentiment Analysis

In [None]:
basic_model = MemotionModel(scratch=False)

Downloading:   0%|          | 0.00/605M [00:00<?, ?B/s]

In [None]:
for param in basic_model.parameters():
    param.requires_grad = False

In [None]:
'''
  Return Weights for training
'''

def ret_sample_weights(df_new,train_dataset):

  class_counts = list(np.unique(df_new['overall_sentiment'],return_counts=True)[1])
  class_weights = [sum(class_counts)/c for c in class_counts]

  sample_weights = [0]*len(train_dataset)

  for idx,x in enumerate(train_dataset):
    class_weight = class_weights[x['sentiment']]
    sample_weights[idx] = class_weight

  return sample_weights


In [None]:
class SentiModel(nn.Module):
    def __init__(self, basic_model, num_class=3):
        super(SentiModel, self).__init__()
        self.basic_model = basic_model
        self.fc1 = nn.Linear(1024,256)
        self.fc2 = nn.Linear(256, 256)
        self.fc3 = nn.Linear(256, 256)
        self.fc4 = nn.Linear(256,num_class)
        self.softmax = nn.Softmax(dim=1)
        self.batchnorm = nn.BatchNorm1d(256)
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()

    def forward(self,x,input_ids,att_mask,y):        
        img_embed, text_embed =  self.basic_model(x,input_ids.squeeze(1),att_mask.squeeze(1))
        x = torch.cat((img_embed,text_embed),dim=1)
        x.requires_grad = True
        x = self.batchnorm(self.tanh(self.fc1(x)))
        x = self.batchnorm(self.tanh(self.fc2(x)))
        x = self.batchnorm(self.relu(self.fc3(x)))
        x = self.softmax(self.fc4(x))
        return x

In [None]:
def one_iteration(senti_model, train_loader, optimizer, criterion):
    for epoch in range(5):
        print('Epoch ',epoch+1)
        running_loss = 0.0
        correct = 0
        total = 0
        training_loss = []
        y_true = []
        y_predicted = []

        train_tqdm_object = tqdm(train_loader, total=len(train_loader))
        for i, x in enumerate(train_tqdm_object):
            optimizer.zero_grad()
            outputs = senti_model(x['img'].to(device),x['input_ids'].to(device),x['att_mask'].to(device),x['sentiment'].to(device))
                
            _, predicted = torch.max(outputs.data, 1)
            total += x['sentiment'].to(device).size(0)
            y_true.append(x['sentiment'].to(device))
            y_predicted.append(predicted)
            correct += (predicted.cpu() == x['sentiment']).sum().item()

            loss = criterion(outputs, x['sentiment'].to(device))
            loss.backward()
            optimizer.step()
            # print statistics
            running_loss += loss.item()
            training_loss.append(loss.item())
            if (i+1) % 50 == 0:    
                print('[%d, %5d] loss: %.5f' %
                    (epoch + 1, i + 1, running_loss / 50))
                running_loss = 0.0
        y_true = torch.cat(y_true,dim=0)
        y_predicted = torch.cat(y_predicted,dim=0)
        print(f'Train Acc: {100*correct/total}')
    return senti_model

In [None]:
def test_func(senti_model, test_loader):
    correct = 0
    total = 0
    y_true = []
    y_predicted = []

    with torch.no_grad():
        for x in test_loader:
            labels = x['sentiment'].to(device)
            outputs = senti_model(x['img'].to(device),x['input_ids'].to(device),x['att_mask'].to(device),x['sentiment'].to(device))
            _, predicted = torch.max(outputs.data, 1)
            y_true.append(x['sentiment'].to(device))
            y_predicted.append(predicted)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    y_true = torch.cat(y_true,dim=0)
    y_predicted = torch.cat(y_predicted,dim=0)
    score = f1_score(y_true.cpu().numpy(),y_predicted.cpu().numpy(),average='macro')
    acc = 100*correct/total
    print(f'F1 Score: {score}')
    print(f'Test Acc: {acc}')
    return score, acc

In [None]:
BATCH_SIZE = 256

def train_kfold():
    count = 0
    CROSS_VALIDATION_SCORES = {}

    for df_train, test_df in X_splits:
        count += 1
        # Create dataset
        print(f'\n\n----------------------------------------- TRAINING SPLIT {count} -----------------------------------------')

        train_dataset = create_dataset(df_train,tokeniser, 'train_images/')
        test_dataset = create_dataset(test_df,tokeniser, 'train_images/')

        sample_weights = ret_sample_weights(train_df,train_dataset)

        train_sampler = WeightedRandomSampler(sample_weights,num_samples=len(sample_weights),replacement=True)
        train_loader = DataLoader(train_dataset,batch_size=BATCH_SIZE,sampler=train_sampler)
        test_loader = DataLoader(test_dataset,batch_size=BATCH_SIZE)

        senti_model = SentiModel(basic_model).to(device)
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(senti_model.parameters(), lr=3e-7, weight_decay=1e-5)

        returned_senti_model = one_iteration(senti_model, train_loader, optimizer, criterion)
        torch.save(returned_senti_model.state_dict(),f'/content/drive/MyDrive/Wipro/Final/Task_A/kFold/kfold_memotion_sentences_clip_original_split_count_{count}.pt')

        del train_dataset, train_sampler, train_loader, senti_model, criterion, optimizer, 

        score, acc = test_func(returned_senti_model, test_loader)
        print(f'\nFinal Acc: {acc}')
        print(f'Final F1 Score: {score}')

        del test_dataset, test_loader
        torch.cuda.empty_cache()
        
        CROSS_VALIDATION_SCORES[count] = (returned_senti_model,score)

    return CROSS_VALIDATION_SCORES

In [None]:
CROSS_VALIDATION_SCORES = train_kfold()



----------------------------------------- TRAINING SPLIT 1 -----------------------------------------
Epoch  1


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 30.773361976369497
Epoch  2


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 32.40243465807376
Epoch  3


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 32.25921947726459
Epoch  4


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 32.63515932688865
Epoch  5


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 31.954887218045112
F1 Score: 0.2784925375373062
Test Acc: 31.424481030780242

Final Acc: 31.424481030780242
Final F1 Score: 0.2784925375373062


----------------------------------------- TRAINING SPLIT 2 -----------------------------------------
Epoch  1


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 32.45614035087719
Epoch  2


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 32.09810239885428
Epoch  3


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 31.471535982814178
Epoch  4


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 33.51235230934479
Epoch  5


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 32.13390619405657
F1 Score: 0.29492598358898575
Test Acc: 32.64137437365784

Final Acc: 32.64137437365784
Final F1 Score: 0.29492598358898575


----------------------------------------- TRAINING SPLIT 3 -----------------------------------------
Epoch  1


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 33.22592194772646
Epoch  2


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 33.046902971715
Epoch  3


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 33.583959899749374
Epoch  4


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 34.4969566774078
Epoch  5


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 35.08771929824562
F1 Score: 0.2895239786230978
Test Acc: 31.28131710808876

Final Acc: 31.28131710808876
Final F1 Score: 0.2895239786230978


----------------------------------------- TRAINING SPLIT 4 -----------------------------------------
Epoch  1


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 33.75693574369071
Epoch  2


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 35.403615536065864
Epoch  3


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 33.77483443708609
Epoch  4


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 34.526579559692145
Epoch  5


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 35.94057633792733
F1 Score: 0.326491598672861
Test Acc: 36.81948424068768

Final Acc: 36.81948424068768
Final F1 Score: 0.326491598672861


----------------------------------------- TRAINING SPLIT 5 -----------------------------------------
Epoch  1


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 32.09235725792017
Epoch  2


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 33.00519062108466
Epoch  3


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 32.18185072489708
Epoch  4


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 33.381063182387685
Epoch  5


  0%|          | 0/22 [00:00<?, ?it/s]

Train Acc: 33.34526579559692
F1 Score: 0.30034240246617583
Test Acc: 33.59598853868195

Final Acc: 33.59598853868195
Final F1 Score: 0.30034240246617583


In [None]:
avg_score = sum([i[-1] for i in CROSS_VALIDATION_SCORES.values()]) / 5

best_split = sorted(CROSS_VALIDATION_SCORES, key=lambda x: CROSS_VALIDATION_SCORES[x][-1], reverse=True)[0]
best_model, best_score = CROSS_VALIDATION_SCORES[best_split]

In [None]:
best_split, avg_score, best_score

(4, 0.2979553001776853, 0.326491598672861)

In [None]:
torch.save(best_model.state_dict(),f'/content/drive/MyDrive/Wipro/Final/Task_A/kFold/models/kfold_memotion_sentences_clip_original_best_split.pt')