In [2]:
import torch
import pandas as pd
import numpy as np
import torchdata
from torchtext.data.utils import get_tokenizer
from torchtext.vocab import build_vocab_from_iterator
from torchtext.datasets import AG_NEWS
from sklearn.model_selection import train_test_split
import warnings
import os
from skimage import color 
from skimage.io import imread, imshow
from skimage.transform import resize
import torchvision.transforms as transforms
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [2]:
#!pip install torchtext
#!pip install torchdata

### First setting up path of OS 

In [98]:
# below code is for navigation while getting the path to the images folder and the names of all of the image files

In [3]:
workspace = "images"    #Directory to the images folder (this code file is in the same folder)
img_file_names=os.listdir(workspace) #Contains the names of all of image files

# Preprocessing

In [4]:
dataset_file = pd.read_csv('labels.csv') #reading the provided data set
dataset_file #displaying the dataset

Unnamed: 0.1,Unnamed: 0,image_name,text_ocr,text_corrected,humour,sarcasm,offensive,motivational,overall_sentiment
0,0,image_1.jpg,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,LOOK THERE MY FRIEND LIGHTYEAR NOW ALL SOHALIK...,hilarious,general,not_offensive,not_motivational,very_positive
1,1,image_2.jpeg,The best of #10 YearChallenge! Completed in le...,The best of #10 YearChallenge! Completed in le...,not_funny,general,not_offensive,motivational,very_positive
2,2,image_3.JPG,Sam Thorne @Strippin ( Follow Follow Saw every...,Sam Thorne @Strippin ( Follow Follow Saw every...,very_funny,not_sarcastic,not_offensive,not_motivational,positive
3,3,image_4.png,10 Year Challenge - Sweet Dee Edition,10 Year Challenge - Sweet Dee Edition,very_funny,twisted_meaning,very_offensive,motivational,positive
4,4,image_5.png,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,10 YEAR CHALLENGE WITH NO FILTER 47 Hilarious ...,hilarious,very_twisted,very_offensive,not_motivational,neutral
...,...,...,...,...,...,...,...,...,...
6987,6987,image_6988.jpg,Tuesday is Mardi Gras Wednesday is Valentine's...,Tuesday is Mardi Gras Wednesday is Valentine's...,very_funny,twisted_meaning,very_offensive,motivational,neutral
6988,6988,image_6989.jpg,MUST WATCH MOVIES OF 2017 ITI Chennai memes MA...,MUST WATCH MOVIES OF 2017 ITI Chennai memes MA...,funny,twisted_meaning,not_offensive,not_motivational,neutral
6989,6989,image_6990.png,LESS MORE TALKING PLANNING SODA JUNK FOOD COMP...,LESS MORE TALKING PLANNING SODA JUNK FOOD COMP...,funny,general,slight,not_motivational,positive
6990,6990,image_6991.jpg,When I VERY have time is a fantasy No one has ...,When I have time is a fantasy. no one has time...,not_funny,twisted_meaning,not_offensive,motivational,very_positive


In [5]:
#Seperating the coloumns we need from the main dataset file which was provided to us
selected_Col_p1 = dataset_file[["image_name","text_corrected"]]
#Checking the null values
selected_Col_p1.isnull().sum()

image_name        0
text_corrected    5
dtype: int64

In [6]:
#noting the indexes of the entries which have null text
null_txt_list = selected_Col_p1[selected_Col_p1['text_corrected'].isnull()].index.tolist() 
null_txt_list

[119, 4799, 6781, 6784, 6786]

In [None]:
#The cell below will find all of the images names, which are greater than the size of 2000 because they will be outliars.
#later on they will be removed

In [7]:
#Noting images which are huge, outliars
img_OLnames_list=[]
#the below loop gives a list with the names 
for i in img_file_names:
    image = imread(workspace+"/"+ i)
    if image.shape[0] > 2000:
        img_OLnames_list.append(i)



In [8]:
#noting the indexes of the image outliars in the selected dataset
str = []
for i in img_OLnames_list:
    str.append(selected_Col_p1[selected_Col_p1['image_name']==i].index.tolist())  #returns the index of the name

#img_OLnames_list has the names of outliar images (ol means outliars)
#ol_img_list will have their indexes 

ol_img_list=[]
for i in str:
    ol_img_list.append(i[0])

In [9]:
remove_index = ol_img_list + null_txt_list

In [10]:
print("Length before removing ", len(selected_Col_p1))
selected_Col_p1.isnull().sum()

Length before removing  6992


image_name        0
text_corrected    5
dtype: int64

In [11]:
#removing null values and outliars too
selected_Col_p1.drop(labels=remove_index,axis=0, inplace=True)
print("Length after removing ", len(selected_Col_p1)) 
selected_Col_p1.isnull().sum()

Length after removing  6968


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


image_name        0
text_corrected    0
dtype: int64

In [100]:
labels_Col_p1 = dataset_file[["overall_sentiment"]]
labels_Col_p1.drop(labels=remove_index,axis=0, inplace=True) #Dropping/removing the entries which are either outliars or have null valuyes as calculated above

In [77]:
len(labels_Col_p1)

6968

In [78]:
labels_Col_p1.isnull().sum()

overall_sentiment    0
dtype: int64

# For Text

In [15]:
tokenizer = get_tokenizer('basic_english')
train_iter = AG_NEWS(split='train')

#Selecting only corrected text coloumn of  selected_Col_p1 in df_SCP1_tokenization to tokenize and get numbers for each word 
df_SCP1_tokenization = selected_Col_p1["text_corrected"]

def tokens(data):
    for txt in data:
        yield tokenizer(txt)

vocab = build_vocab_from_iterator(tokens(df_SCP1_tokenization))




In [16]:
#Creating a neumeric array to store the numbers for every word 
neumeric_array = [] #to store word converted to numbers

for sentence in df_SCP1_tokenization: #Selects a row (sentence, aka text_corrected) one by one
    token = tokenizer(sentence) #tokenizes the sentences, that is breaks it into words. retuns a list with words in the sentence
    #line below will convert word to number
    neumeric_array.append(vocab(token)) 

In [17]:
#converting the list to numpy array 
neumeric_array = np.array(neumeric_array)
len(neumeric_array)

  neumeric_array = np.array(neumeric_array)


6968

In [18]:
#Finding the maximum length of the array which contains numeric words 
max_len=0
for i in neumeric_array:
    if len(i) > max_len:
        max_len = len(i)
print(max_len)

225


In [19]:
#converting and bringing all to the same size, by appending zeros
for i in neumeric_array:
    while (len(i) < max_len):
        i.append(0)

In [20]:
#For checking if padding and equalizing the size worked
#for i in neumeric_array:
#    print(len(i))

In [21]:
text_neumeric_tensor = [] #converting the neumeric array into tensors
for i in neumeric_array:
    tensor_txt = torch.tensor(i)
    text_neumeric_tensor.append(tensor_txt)

In [79]:
final_lab=[]
#Here, converting the labels to numbers for the model to understand
#Giving very positive and positive 1, and so on
for i in labels_Col_p1['overall_sentiment']:
    try:
        if (i == 'very_positive' or i == 'positive'):
            i =  1
        if (i == 'very_negative' or i == 'negative'):
            i = 2
        if (i == 'neutral'):
            i = 0
        final_lab.append(i)
    except:
        print(i)

In [23]:
label_tensor = []
#converting labels from final_lab (containing 1,0,2) to tensors
for Flabel in final_lab:
    Flabel = torch.tensor(Flabel, dtype = torch.int)
    label_tensor.append(Flabel)

In [24]:
#to view tensors of label
#label_tensor

In [25]:
#to view tensors of text neumerics
#text_neumeric_tensor

In [26]:
x_TENtext_train , x_TENtext_test , y_TENtext_train , y_TENtext_test = train_test_split(text_neumeric_tensor,label_tensor, test_size = 0.3, random_state = 60 )

# For Image

In [27]:
#Removing the names of the images with outliars values

for i in img_OLnames_list:
    img_file_names.remove(i) #removing the image from image list 
len(img_file_names)

6973

In [28]:
#Now deleting the images which had no text in dataset
nullvalues_df = dataset_file[["image_name"]].loc[[ i for i in null_txt_list]]
null_name_list=nullvalues_df.reset_index(drop=True)["image_name"]

In [29]:
#Removing the names of the images with null values in text

for i in null_name_list:
    img_file_names.remove(i)
len(img_file_names)

6968

In [32]:
#Now creating tensors for image
Img_tensor = []
transform = transforms.ToTensor()
for i in img_file_names:
    image = imread(workspace+"/"+ i , as_gray=True)
    image = resize(image, (250,250))
    tensor = transform(image)
    Img_tensor.append(tensor.flatten())

In [92]:
# img_tensor_df = pd.DataFrame({'Img_Tensors':[*Img_tensor]})
len(Img_tensor[0])

62500

In [93]:
# img_tensor_df.to_csv('Img_tensors_df.csv' , index=False)

In [30]:
#to read the saved image tensors
img_tensor_df = pd.read_csv('Img_tensors_df.csv')

In [33]:
x_img = img_tensor_df['Img_Tensors']
x_img_train , x_img_test , y_img_train , y_img_test = train_test_split(Img_tensor,label_tensor , test_size= 0.3, random_state =60)

In [34]:
len(x_img_train[0])

62500

# Neural Network (Part 1) 

In [38]:
class Neural_Network_Model(nn.Module):
    def __init__(self):
        super(Neural_Network_Model,self).__init__()

        # Making layers for text first
        self.linearText1 = nn.Linear(225, 200)
        self.linearText2 = nn.Linear(200, 150)
        self.linearText3 = nn.Linear(150, 130)
        self.linearText4 = nn.Linear(130, 120)
        self.linearText5 = nn.Linear(120, 100)
 
        #then makeing picture layers
        self.linearImg1 =  nn.Linear(250*250, 6000)
        self.linearImg2 =  nn.Linear(6000, 4000)
        self.linearImg3 =  nn.Linear(4000, 1000)
        self.linearImg4 =  nn.Linear(1000, 900)
        self.linearImg5 = nn.Linear(900, 700)

        #Concetinating lyers
        self.linearCat1  = nn.Linear(800, 500)
        self.linearCat2  = nn.Linear(500, 350)
        self.linearCat3  = nn.Linear(350, 300)
        self.linearCat4  = nn.Linear(300, 250)
        self.linearCat5  = nn.Linear(250, 100)
        self.linearCat6  = nn.Linear(100, 3)

        
    def forward(self,ArgTxt,ArgImg):
        sigTXT=torch.sigmoid(self.linearText1(ArgTxt))  
        sigTXT= torch.sigmoid(self.linearText2(sigTXT)) 
        sigTXT= torch.sigmoid(self.linearText3(sigTXT)) 
        sigTXT= torch.sigmoid(self.linearText4(sigTXT)) 
        sigTXT = self.linearText5(sigTXT)
        
        sigIMG = torch.sigmoid(self.linearImg1(ArgImg))  
        sigIMG = torch.sigmoid(self.linearImg2(sigIMG)) 
        sigIMG = torch.sigmoid(self.linearImg3(sigIMG)) 
        sigIMG = torch.sigmoid(self.linearImg4(sigIMG)) 
        sigIMG = self.linearImg5(sigIMG)
        
        txtNimg = torch.cat((sigTXT,sigIMG))

        txtNimg = torch.sigmoid(self.linearCat1(txtNimg))  #Have to add more layers after joining to get a y shape instead of u shape
        txtNimg = torch.sigmoid(self.linearCat2(txtNimg)) 
        txtNimg = torch.sigmoid(self.linearCat3(txtNimg)) 
        txtNimg = torch.sigmoid(self.linearCat4(txtNimg))
        txtNimg = torch.sigmoid(self.linearCat5(txtNimg))
         
        Output_txtNimg = self.linearCat6(txtNimg)
        return Output_txtNimg
    

In [39]:
model = Neural_Network_Model()

In [40]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [102]:
for epoch in range(1):  #times the loop will run over the dataset

    # training
    #for i in range(0,len(x_img_train)):
    for i in range(0,20):

        image = x_img_train[i].float()       # image tensor
        text = x_TENtext_train[i].float()   #text tensor
        label = y_img_train[i].type(torch.long)              #label tensor
        pred = model(text, image)
        loss = criterion(pred, label)  #calculate loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()               #update weights
        

    model_predictions = []
    Ytest = []
     #testing   
    #for i in range(0,len(x_img_train)):
    for i in range(0,20):
        image = x_img_test[i].float()     # image tensor
        text = x_TENtext_test[i].float()        #text tensor
        label = y_img_test[i].type(torch.long)             #label tensor
        pred = model(text, image)

        model_predictions.append(int(torch.argmax(pred)))              
        Ytest.append(y_img_test[i].cpu().detach().numpy())     # changing the tensor back to normal array

    print("epoch",epoch+1)
    print("Accuracy: ",accuracy_score(Ytest,model_predictions)*100)
    print('F1 score :',f1_score(Ytest,model_predictions,average = 'macro')*100)


print('Finished Training')

epoch 1
Accuracy:  75.0
F1 score : 28.57142857142857
Finished Training


# Part - 2

In [103]:
#below seperating labels for humour, sarcasm, offensive and motivational, then removing the outliars and null values

In [53]:
labels_humour = dataset_file[["humour"]]
labels_sarcasm = dataset_file[["sarcasm"]]
labels_offensive = dataset_file[["offensive"]]
labels_motivational = dataset_file[["motivational"]]

#Removing the index of outliars or the null values which were considered before

labels_humour.drop(labels=remove_index,axis=0, inplace=True)
labels_sarcasm.drop(labels=remove_index,axis=0, inplace=True)
labels_offensive.drop(labels=remove_index,axis=0, inplace=True)
labels_motivational.drop(labels=remove_index,axis=0, inplace=True)


## Humour

In [96]:
labels_humour.value_counts()

humour    
funny         2442
very_funny    2231
not_funny     1645
hilarious      650
dtype: int64

In [69]:
final_lab_humour=[]
#Here, converting the labels to numbers for the model to understand
for i in labels_humour['humour']:
    try:
        if (i == 'not_funny'):
            i =  0
        else:
            i = 1
        
        final_lab_humour.append(i)
    except:
        print("fail")

In [71]:
len(final_lab_humour)

6968

In [83]:
label_tensor_humour = []

for Flabel in final_lab_humour:
    Flabel = torch.tensor(Flabel, dtype = torch.int)
    label_tensor_humour.append(Flabel)

## Sarcasm

In [95]:
labels_sarcasm.value_counts()

sarcasm        
general            3495
twisted_meaning    1543
not_sarcastic      1537
very_twisted        393
dtype: int64

In [72]:
final_lab_sarcasm=[]
#Here, converting the labels to numbers for the model to understand
for i in labels_sarcasm['sarcasm']:
    try:
        if (i == 'not_sarcastic'):
            i =  0
        else:
            i = 1
        
        final_lab_sarcasm.append(i)
    except:
        print(i)
        


In [80]:
len(final_lab_sarcasm)

6968

In [84]:
label_tensor_sarcasm = []
#converting labels from final_lab (containing 1,0,2) to tensors
for Flabel in final_lab_sarcasm:
    Flabel = torch.tensor(Flabel, dtype = torch.int)
    label_tensor_sarcasm.append(Flabel)

## Offensive

In [94]:
labels_offensive.value_counts()

offensive        
not_offensive        2704
slight               2583
very_offensive       1460
hateful_offensive     221
dtype: int64

In [73]:
final_lab_offensive=[]
#Here, converting the labels to numbers for the model to understand
for i in labels_offensive['offensive']:
    try:
        if (i == 'not_offensive'):
            i =  0
        else:
            i = 1
        
        final_lab_offensive.append(i)
    except:
        print(i)

In [81]:
len(final_lab_offensive)

6968

In [85]:
label_tensor_offensive = []
#converting labels from final_lab (containing 1,0,2) to tensors
for Flabel in final_lab_offensive:
    Flabel = torch.tensor(Flabel, dtype = torch.int)
    label_tensor_offensive.append(Flabel)

## Motivational

In [93]:
labels_motivational.value_counts()

motivational    
not_motivational    4510
motivational        2458
dtype: int64

In [74]:
final_lab_motivational=[]
#Here, converting the labels to numbers for the model to understand
for i in labels_motivational['motivational']:
    try:
        if (i == 'not_motivational'):
            i =  0
        else:
            i = 1
        
        final_lab_motivational.append(i)
    except:
        print(i)

In [82]:
len(final_lab_motivational)

6968

In [86]:
label_tensor_motivational = []
#converting labels from final_lab (containing 1,0,2) to tensors
for Flabel in final_lab_motivational:
    Flabel = torch.tensor(Flabel, dtype = torch.int)
    label_tensor_motivational.append(Flabel)

# Neural Network (Part 2)

In [97]:
class Neural_Network_Model_p2(nn.Module):
    def _init_(self):
        super(Neural_Network_Model_p2,self)._init_()

        #making lyers for tezt 
        self.linearP2Text1 = nn.Linear(12933, 9000)
        self.linearP2Text2 = nn.Linear(9000, 8000)
        self.linearP2Text3 = nn.Linear(8000, 7000)
        self.linearP2Text4 = nn.Linear(7000, 6000)
        self.linearP2Text5 = nn.Linear(6000, 5000)
        self.linearP2Text6 = nn.Linear(5000, 3500)
        self.linearP2Text7 = nn.Linear(3500, 350)

        #now making lyers for pictures 
        self.linearP2Img1 =  nn.Linear(250*250, 9000)    #250X250 because the images are resized
        self.linearP2Img2 =  nn.Linear(9000, 8000)
        self.linearP2Img3 =  nn.Linear(8000, 6000)
        self.linearP2Img4 =  nn.Linear(6000, 5000)
        self.linearP2Img5 =  nn.Linear(5000, 3500)
        self.linearP2Img6 =  nn.Linear(3500, 500)
        self.linearP2Img7 = nn.Linear(500, 350)

        #comibined
        self.linearP2Cat1  = nn.Linear(700, 600)     #350+350=700
        self.linearP2Cat2  = nn.Linear(600, 550)
        self.linearP2Cat3  = nn.Linear(550, 400)
        self.linearP2Cat4  = nn.Linear(400, 350)
        self.linearP2Cat5  = nn.Linear(350, 200)
        self.linearP2Cat6  = nn.Linear(200, 100)
        self.linearP2Cat7  = nn.Linear(100, 65)

        #Humour
        self.linearP2Hum1 = nn.Linear(65,40)
        self.linearP2Hum2 = nn.Linear(40,30)
        self.linearP2Hum3 = nn.Linear(30,20)
        self.linearP2Hum4 = nn.Linear(20,10)
        self.linearP2Hum5 = nn.Linear(10,4)     #there are 4 types of outputs in this coloume
        
        #sarcasm
        self.linearP2Sar1 = nn.Linear(65,40)
        self.linearP2Sar2 = nn.Linear(40,30)
        self.linearP2Sar3 = nn.Linear(30,20)
        self.linearP2Sar4 = nn.Linear(20,10)
        self.linearP2Sar5 = nn.Linear(10,4)     #there are 4 types of outputs in this coloume

        #offensive
        self.linearP2Off1 = nn.Linear(65,40)
        self.linearP2Off2 = nn.Linear(40,30)
        self.linearP2Off3 = nn.Linear(30,20)
        self.linearP2Off4 = nn.Linear(20,10)
        self.linearP2Off5 = nn.Linear(10,4)    #there are 4 types of outputs in this coloume
        
        #Motivational
        self.linearP2Moti1 = nn.Linear(65,40)
        self.linearP2Moti2 = nn.Linear(40,10)
        self.linearP2Moti3 = nn.Linear(40,10)
        self.linearP2Moti4 = nn.Linear(40,10)
        self.linearP2Moti5 = nn.Linear(10,2)      #there are only 2 types of outputs in motivational col

        
    def forward(self,P2ArgTxT,P2ArgImg):
        P2sigTXT=torch.sigmoid(self.linearlinearP2Text1(P2ArgTxT))  
        P2sigTXT= torch.sigmoid(self.linearlinearP2Text2(P2sigTXT)) 
        P2sigTXT= torch.sigmoid(self.linearlinearP2Text3(P2sigTXT)) 
        P2sigTXT= torch.sigmoid(self.linearlinearP2Text4(P2sigTXT)) 
        P2sigTXT= torch.sigmoid(self.linearlinearP2Text5(P2sigTXT))
        P2sigTXT= torch.sigmoid(self.linearlinearP2Text6(P2sigTXT)) 
        P2sigTXT = self.linearP2Text7(P2sigTXT)        #Sending to the last layer of text which gives required outputs
        
        P2sigIMG = torch.sigmoid(self.linearP2Img1(P2ArgImg))  
        P2sigIMG = torch.sigmoid(self.linearP2Img2(P2sigIMG)) 
        P2sigIMG = torch.sigmoid(self.linearP2Img3(P2sigIMG)) 
        P2sigIMG = torch.sigmoid(self.linearP2Img4(P2sigIMG)) 
        P2sigIMG = torch.sigmoid(self.linearP2Img5(P2sigIMG)) 
        P2sigIMG = torch.sigmoid(self.linearP2Img6(P2sigIMG)) 
        P2sigIMG = self.linearP2Img7(P2sigIMG)        #Sending to the last layer of text which gives required outputs
        
        P2txtNimg = torch.cat((P2sigTXT,P2sigIMG))

        P2txtNimg = torch.sigmoid(self.linearP2Cat1(P2txtNimg))  #Have to add more layers after joining to get a y shape instead of u shape
        P2txtNimg = torch.sigmoid(self.linearP2Cat2(P2txtNimg)) 
        P2txtNimg = torch.sigmoid(self.linearP2Cat3(P2txtNimg)) 
        P2txtNimg = torch.sigmoid(self.linearP2Cat4(P2txtNimg))
        P2txtNimg = torch.sigmoid(self.linearP2Cat5(P2txtNimg))
        P2txtNimg = torch.sigmoid(self.linearP2Cat6(P2txtNimg))
        P2txtNimg = self.linearP2Cat7(P2txtNimg)        #Sending to the last layer of text which gives required outputs

        #humour
        p2h = torch.sigmoid(self.linearP2Hum1(P2txtNimg))
        p2h = torch.sigmoid(self.linearP2Hum2(p2h))
        p2h = torch.sigmoid(self.linearP2Hum3(p2h))
        p2h = torch.sigmoid(self.linearP2Hum4(p2h))
        p2_humour = self.linearP2Hum5(h)

        
        #sarcasm
        p2s = torch.sigmoid(self.linearP2Sar1(P2txtNimg))
        p2s = torch.sigmoid(self.linearP2Sar2(p2s))
        p2s = torch.sigmoid(self.linearP2Sar3(p2s))
        p2s = torch.sigmoid(self.linearP2Sar4(p2s))
        p2_sarcasm = self.linearP2Sar5(p2s)

        #offensive
        p2o = torch.sigmoid(self.linearP2Off1(P2txtNimg))
        p2o = torch.sigmoid(self.linearP2Off2(p2o))
        p2o = torch.sigmoid(self.linearP2Off3(p2o))
        p2o = torch.sigmoid(self.linearP2Off4(p2o))
        p2_offensive = self.linearP2Off5(p2o)

        #motivational
        p2m = torch.sigmoid(self.linearP2Moti1(P2txtNimg))
        p2m = torch.sigmoid(self.linearP2Moti2(p2m))
        p2m = torch.sigmoid(self.linearP2Moti3(p2m))
        p2m = torch.sigmoid(self.linearP2Moti4(p2m))
        p2_motivational = self.linearP2Moti5(p2m)



        return p2_humour,p2_sarcasm,p2_offensive,p2_motivational