In [None]:
!rm -Rf sample_data

# Import Data

In [None]:
!cp /content/drive/MyDrive/Wipro/Data/memotion1_images_combined.zip .

In [None]:
!unzip -q memotion1_images_combined

In [None]:
!rm memotion1_images_combined.zip

In [None]:
import os
len(os.listdir('images'))

8870

# Model

In [None]:
!pip install timm
!pip install transformers

Collecting timm
  Downloading timm-0.4.12-py3-none-any.whl (376 kB)
[?25l[K     |▉                               | 10 kB 29.8 MB/s eta 0:00:01[K     |█▊                              | 20 kB 36.5 MB/s eta 0:00:01[K     |██▋                             | 30 kB 43.4 MB/s eta 0:00:01[K     |███▌                            | 40 kB 28.5 MB/s eta 0:00:01[K     |████▍                           | 51 kB 18.2 MB/s eta 0:00:01[K     |█████▏                          | 61 kB 15.3 MB/s eta 0:00:01[K     |██████                          | 71 kB 14.0 MB/s eta 0:00:01[K     |███████                         | 81 kB 15.5 MB/s eta 0:00:01[K     |███████▉                        | 92 kB 14.5 MB/s eta 0:00:01[K     |████████▊                       | 102 kB 13.3 MB/s eta 0:00:01[K     |█████████▋                      | 112 kB 13.3 MB/s eta 0:00:01[K     |██████████▍                     | 122 kB 13.3 MB/s eta 0:00:01[K     |███████████▎                    | 133 kB 13.3 MB/s eta 0:00:0

In [None]:
import shutil
import torch
from torch.utils.data import DataLoader,Dataset
import transformers
from transformers import CLIPModel, CLIPConfig
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
import os 
import pandas as pd
import cv2 as cv
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import WeightedRandomSampler
from sklearn.metrics import  f1_score
from tqdm.autonotebook import tqdm

import tensorflow as tf
import tensorboard as tb
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile


from tensorboard.plugins import projector

In [None]:
# CLIPModel(CLIPConfig())

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
device

device(type='cuda', index=0)

In [None]:
# CLIP Tokeniser
tokeniser = transformers.CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=862328.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=524657.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=389.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=568.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1485500.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=3984.0, style=ProgressStyle(description…

ftfy or spacy is not installed using BERT BasicTokenizer instead of ftfy.





In [None]:
class AvgMeter:
    def __init__(self, name="Metric"):
        self.name = name
        self.reset()

    def reset(self):
        self.avg, self.sum, self.count = [0] * 3

    def update(self, val, count=1):
        self.count += count
        self.sum += val * count
        self.avg = self.sum / self.count

    def __repr__(self):
        text = f"{self.name}: {self.avg:.4f}"
        return text

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group["lr"]

In [None]:
'''
  Dataset class
'''

class MemotionData(Dataset):
    

    def __init__(self,root_dir,img_names,ocr_text,sentiments,tokeniser,transforms=None):
        
        self.img_names = img_names
        self.ocr_text = ocr_text
        self.sentiments = sentiments
        self.tokeniser = tokeniser
        self.root_dir = root_dir
        self.transforms = transforms
        
    def __len__(self):
        return len(self.img_names)
        
        
    def __getitem__(self,idx):
        
        # print(self.img_names[idx])

        img = cv.imread(os.path.join(self.root_dir,self.img_names[idx]))
        img = cv.cvtColor(img,cv.COLOR_BGR2RGB)
        
        if self.transforms:
            img = self.transforms(img)
        
        output_token_ids = self.tokeniser.encode_plus(
            self.ocr_text[idx],
            max_length=76,
            add_special_tokens=True,
            padding='max_length',
            return_tensors='pt',
            return_attention_mask=True,
            truncation = True
        )
        
        return {
            'img': torch.FloatTensor(img),
            'input_ids': output_token_ids['input_ids'],
            'att_mask': output_token_ids['attention_mask'],
            'sentiment': torch.tensor(self.sentiments[idx],dtype=torch.long)
        }
        


def create_dataset(df,tokeniser,max_len=76):
    ds = MemotionData(
        root_dir = './images/',
        img_names = df['image_name'].to_list(),
        ocr_text= df['text_corrected'].to_list(),
        sentiments = df['overall_sentiment'].to_list(),
        tokeniser = tokeniser,
        transforms = torchvision.transforms.Compose(
        [
                torchvision.transforms.ToPILImage(),
                torchvision.transforms.Resize((224,224)),
                torchvision.transforms.ToTensor(),
        ]
        )
    )
    return ds        

In [None]:
'''
  To create model based on CLIP 
'''
class MemotionModel(nn.Module):
    def __init__(self,scratch=True):
        super(MemotionModel,self).__init__()
        self.pre_model = CLIPModel(CLIPConfig()).to(device)
        self.scratch = scratch
        
        if scratch:
          for params in self.pre_model.parameters():
              params.requires_grad = True
        
        else:
          for params in self.pre_model.parameters():
              params.requires_grad = False
                
    def forward(self,x,input_ids,att_mask):
        img_embed =  self.pre_model.get_image_features(x)
        text_embed = self.pre_model.get_text_features(input_ids.squeeze(1),attention_mask=att_mask.squeeze(1))
        return img_embed, text_embed
        

def calc_loss(image_embeddings, text_embeddings, temperature=1.0):
    logits = (text_embeddings @ image_embeddings.T) / temperature
    images_similarity = image_embeddings @ image_embeddings.T
    texts_similarity = text_embeddings @ text_embeddings.T
    targets = F.softmax(
        (images_similarity + texts_similarity) / 2 * temperature, dim=-1
    )
    texts_loss = cross_entropy(logits, targets, reduction='none')
    images_loss = cross_entropy(logits.T, targets.T, reduction='none')
    loss =  (images_loss + texts_loss) / 2.0 # shape: (batch_size)
    return logits, targets, loss.mean()

def cross_entropy(preds, targets, reduction='none'):
    log_softmax = nn.LogSoftmax(dim=-1)
    loss = (-targets * log_softmax(preds)).sum(1)
    if reduction == "none":
        return loss
    elif reduction == "mean":
        return loss.mean()

In [None]:
df = pd.read_csv('merged_data.csv')
df.head()

Unnamed: 0,image_name,text_ocr,text_corrected,humour,sarcasm,offensive,motivational,overall_sentiment,tagged_sentence
0,image_1.jpg,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,hilarious,general,not_offensive,not_motivational,positive,look friend lightyear sohalikut trend play 10 ...
1,image_2.jpeg,The best of #10 YearChallenge! Completed in le...,The best of #10 YearChallenge! Completed in le...,not_funny,general,not_offensive,motivational,positive,best 10 yearchallenge
2,image_3.JPG,Sam Thorne @Strippin ( Follow Follow Saw every...,Sam Thorne @Strippin ( Follow Follow Saw every...,very_funny,not_sarcastic,not_offensive,not_motivational,positive,sam thorne @ strippin follow follow saw everyo...
3,image_4.png,10 Year Challenge - Sweet Dee Edition,10 Year Challenge - Sweet Dee Edition,very_funny,twisted_meaning,very_offensive,motivational,positive,10 year challenge sweet dee edition
4,image_5.png,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,hilarious,very_twisted,very_offensive,not_motivational,neutral,10 year challenge filter 47 hilarious 10 year ...


In [None]:
df.shape

(8870, 9)

In [None]:
df.columns

Index(['image_name', 'text_ocr', 'text_corrected', 'humour', 'sarcasm',
       'offensive', 'motivational', 'overall_sentiment', 'tagged_sentence'],
      dtype='object')

In [None]:
'''
  Defining Labels
'''

def cvt_sentiment(senti):
    if senti in ['very_positive','positive']:
        return 2
    elif senti in ['very_negative','negative']:
        return 0
    else: 
        return 1
    

In [None]:
'''
  Remove unwanted images
'''

df_new = df.copy()
df_new['overall_sentiment'] = df_new['overall_sentiment'].apply(cvt_sentiment)
rem_images = ['image_1567.jpg','image_4924.jpg','image_5119.png','image_6357.jpg']
df_new.drop(df_new[df_new['image_name'].isin(rem_images)].index,inplace=True)
df_new.dropna(inplace=True)

In [None]:
df.shape

(8870, 9)

In [None]:
(df_new['overall_sentiment'].value_counts())

2    5141
1    2729
0     782
Name: overall_sentiment, dtype: int64

# Training

In [None]:
'''
  Train, Val ( 80-10 )
'''

df_train,df_val = train_test_split(df_new,test_size=0.2)
# df_val,df_test = train_test_split(df_test,test_size=0.5)

In [None]:
df_train.shape,df_val.shape

((6921, 9), (1731, 9))

In [None]:
# Create dataset

train_dataset = create_dataset(df_train,tokeniser)
val_dataset = create_dataset(df_val,tokeniser)

In [None]:
'''
  Return Weights for training
'''

def ret_sample_weights(df_new,train_dataset):

  class_counts = list(np.unique(df_new['overall_sentiment'],return_counts=True)[1])
  class_weights = [sum(class_counts)/c for c in class_counts]

  sample_weights = [0]*len(train_dataset)

  for idx,x in enumerate(train_dataset):
    class_weight = class_weights[x['sentiment']]
    sample_weights[idx] = class_weight

  return sample_weights


In [None]:
sample_weights = ret_sample_weights(df_new,train_dataset)

In [None]:
len(sample_weights)

6921

In [None]:
'''
  Loaders for training 
'''
BATCH_SIZE = 64

train_sampler = WeightedRandomSampler(sample_weights,num_samples=len(sample_weights),replacement=True)
train_loader = DataLoader(train_dataset,batch_size=BATCH_SIZE,sampler=train_sampler)
# test_loader = DataLoader(test_dataset,batch_size=BATCH_SIZE)
val_loader = DataLoader(val_dataset,batch_size=BATCH_SIZE)

In [None]:
for i in train_loader:
    print(i.keys())
    break

dict_keys(['img', 'input_ids', 'att_mask', 'sentiment'])


In [None]:
'''
  To check is labels are balanced
'''

temp = [0,0,0]
for x in train_loader:
  # print(x['sentiment'].shape)
  # break
  for y in x['sentiment']:
    # print(y['sentiment'])
    temp[int(y)]+=1
temp

[2297, 2262, 2362]

In [None]:
def train_epoch(model, train_loader, optimizer, lr_scheduler, step):
    loss_meter = AvgMeter()
    tqdm_object = tqdm(train_loader, total=len(train_loader))
    for batch in tqdm_object:

        batch = {k: v.to(device) for k, v in batch.items()}

        optimizer.zero_grad()

        image_embeddings, text_embeddings = model(batch['img'], batch['input_ids'], batch['att_mask'])
        logits, targets, loss = calc_loss(image_embeddings, text_embeddings)

        loss.backward()
        optimizer.step()
        if step == "batch":
            lr_scheduler.step()

        count = batch["img"].size(0)
        loss_meter.update(loss.item(), count)

        tqdm_object.set_postfix(train_loss=loss_meter.avg, lr=get_lr(optimizer))
    return loss_meter


def valid_epoch(model, valid_loader):
    loss_meter = AvgMeter()
    tqdm_object = tqdm(valid_loader, total=len(valid_loader))
    for batch in tqdm_object:
        batch = {k: v.to(device) for k, v in batch.items()}
        image_embeddings, text_embeddings = model(batch['img'], batch['input_ids'], batch['att_mask'])
        logits, targets, loss = calc_loss(image_embeddings, text_embeddings)

        count = batch["img"].size(0)
        loss_meter.update(loss.item(), count)

        tqdm_object.set_postfix(valid_loss=loss_meter.avg)
    
    return loss_meter

In [None]:
head_lr = 1e-3
image_encoder_lr = 1e-4
text_encoder_lr = 1e-5
weight_decay = 1e-3
patience = 1
factor = 0.8
epochs = 5

In [None]:
import itertools

model = MemotionModel().to(device)
params = [
    {"params": model.parameters(), "lr": image_encoder_lr},
]
optimizer = torch.optim.AdamW(params, weight_decay=weight_decay)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode="min", patience=patience, factor=factor
)
step = "epoch"

best_loss = float('inf')

In [None]:
training_loss = []
validation_loss = []
for epoch in range(epochs):
    if (epoch!=0) and ((epoch+1)%5==0):
        torch.save(model.state_dict(),'./clip_scratch_memotion_sentences_basic_model.pt')
    print(f"Epoch: {epoch + 1}")
    model.train()

    train_loss = train_epoch(model, train_loader, optimizer, lr_scheduler, step)
    print(f'train_loss: {train_loss}')
    model.eval()
    with torch.no_grad():
        valid_loss = valid_epoch(model, val_loader)
    print(f'valid_loss: {valid_loss}')
    if valid_loss.avg < best_loss:
        best_loss = valid_loss.avg
        torch.save(model.state_dict(), "best.pt")
        print("Saved Best Model!")
    
    lr_scheduler.step(valid_loss.avg)

Epoch: 1


HBox(children=(FloatProgress(value=0.0, max=109.0), HTML(value='')))


train_loss: Metric: 4.7631


HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))


valid_loss: Metric: 2.1230
Saved Best Model!
Epoch: 2


HBox(children=(FloatProgress(value=0.0, max=109.0), HTML(value='')))


train_loss: Metric: 2.1568


HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))


valid_loss: Metric: 2.2622
Epoch: 3


HBox(children=(FloatProgress(value=0.0, max=109.0), HTML(value='')))


train_loss: Metric: 2.3646


HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))


valid_loss: Metric: 2.2199
Epoch: 4


HBox(children=(FloatProgress(value=0.0, max=109.0), HTML(value='')))


train_loss: Metric: 2.1555


HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))


valid_loss: Metric: 2.1252
Epoch: 5


HBox(children=(FloatProgress(value=0.0, max=109.0), HTML(value='')))


train_loss: Metric: 2.1252


HBox(children=(FloatProgress(value=0.0, max=28.0), HTML(value='')))


valid_loss: Metric: 2.1102
Saved Best Model!


In [None]:
!cp clip_scratch_memotion_sentences_basic_model.pt /content/drive/MyDrive/Wipro/Implementation/models/