###### -----------------START--------------------------------------------

In [1]:
import json
import os
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

In [2]:
train_file_path = '/home/aritra/cric/train_questions.json'
val_file_path = '/home/aritra/cric/val_questions.json'
test_file_path = '/home/aritra/cric/test_v1_questions.json'

In [3]:
# Training Set

with open(train_file_path, "r") as file:
     train_json = json.load(file)

In [4]:
# Validation Set

with open(val_file_path, "r") as file:
     val_json = json.load(file)

In [5]:
# Test Set

with open(test_file_path, "r") as file:
     test_json = json.load(file)

In [6]:
len(train_json)

365235

In [7]:
len(val_json)

43112

In [8]:
len(test_json)

86003

In [9]:
train_json[1099]['question']

'which brown animal walking in the field could be used for transporting people'

In [10]:
val_json[1099]['question']

'is there an object that is a type of public transports'

In [11]:
test_json[1099]['question']

'can the ceramic bird spread wings'

### ------------------------------Extracting Data of Training Set-------------------------------------------------------------------------------



In [12]:
questionList = []
answerList = []
imgList = []

In [13]:
train_json[2]['image_id']

1005

#### iter 1: from 0 , 149000 -> error1.txt -> 159
#### iter 2: from 150000 , 240000 -> error2.txt -> 34
#### iter 3: from 240000 , 365235 ->error3.txt -> 121

In [14]:
# verifying
indexToExclude = []

with open('error1.txt', 'r') as file:
    for line in file:
        number = int(line.strip())
        indexToExclude.append(number)
        
with open('error2.txt', 'r') as file:
    for line in file:
        number = int(line.strip())
        indexToExclude.append(number)
        
with open('error3.txt', 'r') as file:
    for line in file:
        number = int(line.strip())
        indexToExclude.append(number)

In [15]:
len(indexToExclude)

314

In [16]:
for i in tqdm(range(len(train_json))):
    
    if i in indexToExclude:
        continue
        
    pointer = train_json[i]
    
    questionList.append(pointer['question'])
    answerList.append(pointer['answer'])
    imgList.append(pointer['image_id'])

  0%|          | 0/365235 [00:00<?, ?it/s]

In [17]:
len(questionList), len(answerList), len(imgList)

(364921, 364921, 364921)

In [18]:
questionList = questionList[0:5000]
answerList = answerList[0:5000]
imgList = imgList[0:5000]

In [19]:
len(list(set(answerList)))

596

### ---------------------------------------Map Creation--------------------------------------------------------

In [20]:
def findUnique(targetList):
    
    uniqueList = []
    
    for word in targetList:
        if word not in uniqueList:
            uniqueList.append(word)
    
    return uniqueList

In [21]:
len(findUnique(answerList))

596

In [22]:
# creating word to number mapping

mapping = {}
counter = 0

uniqueAnsList = findUnique(answerList)

for word in uniqueAnsList:
    
    if word not in mapping:
        
        mapping[word] = counter
        counter += 1

In [23]:
uniqueAnsList[0:5]

['no', 'small', 'picture', 'table', 'bookshelf']

In [24]:
numOfClasses = max(mapping.values())
numOfClasses

595

In [25]:
len(mapping)

596

In [26]:
# creating number to word mapping

reverse_mapping = dict([(value, key) for key, value in mapping.items()])

### --------------------------------------Processing of Training Set--------------------------------------------------------------------

In [27]:
# converting answer labels of Train set into numbers
labels = []

for i in range(len(answerList)):
    labels.append( mapping[ answerList[i] ] )

In [28]:
len(labels)

5000

## One Hot Encoding

In [29]:
scores = []

for i in tqdm(range(len(answerList))):
    
    s = [0] * (numOfClasses+1)
    s[ mapping[ answerList[i]] ] = 1
    
    scores.append(s)

  0%|          | 0/5000 [00:00<?, ?it/s]

In [30]:
len(scores)

5000

In [31]:
imgPathList = []
filepath = '/home/aritra/cric/images/img/'

for i in tqdm(range(len(imgList))):
    
    imgName = str(imgList[i]) + '.jpg'
    concatedPath = os.path.join(filepath,imgName)
    
    imgPathList.append(concatedPath)

  0%|          | 0/5000 [00:00<?, ?it/s]

In [32]:
from datasets import load_dataset
from datasets import Dataset
import datasets
from PIL import Image
import torch

In [33]:
imgPathList[0:5]

['/home/aritra/cric/images/img/1000.jpg',
 '/home/aritra/cric/images/img/1005.jpg',
 '/home/aritra/cric/images/img/1005.jpg',
 '/home/aritra/cric/images/img/1005.jpg',
 '/home/aritra/cric/images/img/1008.jpg']

In [34]:
len(imgPathList)

5000

In [35]:
listToDictionary = {'questions':questionList, 'labels': labels, 'scores': scores, 'images':imgPathList}
modified_train_set = Dataset.from_dict(listToDictionary)

In [36]:
# mapping each filepath to images in the directory

modified_train_set = modified_train_set.cast_column("images", datasets.Image())

In [37]:
modified_train_set

Dataset({
    features: ['questions', 'labels', 'scores', 'images'],
    num_rows: 5000
})

### ------------------------------------------------Extracting Validation Set---------------------------------------------

In [38]:
questionList_val = []
answerList_val = []
imgList_val = []

In [39]:
# collecting the index containing errorneous images

indexToExcludeVal = []
with open('error_validation.txt', 'r') as file:
    for line in file:
        number = int(line.strip())
        indexToExcludeVal.append(number)

with open('error_validation2.txt', 'r') as file:
    for line in file:
        number = int(line.strip())  # Convert the read line to an integer
        indexToExcludeVal.append(number)


In [40]:
# excluding the index containing errorneous images

for i in tqdm(range(len(val_json))):
    
    if (i in indexToExcludeVal):
        continue
        
    pointer = val_json[i]
    
    questionList_val.append(pointer['question'])
    answerList_val.append(pointer['answer'])
    imgList_val.append(pointer['image_id'])

  0%|          | 0/43112 [00:00<?, ?it/s]

43112 -> 43068 -> 33175

In [41]:
len(questionList_val), len(answerList_val), len(imgList_val)

(33175, 33175, 33175)

In [42]:
uniqueAnswerListVal = list(set(answerList_val))
len(uniqueAnswerListVal)

266

In [43]:
# check if all the uniques answers of validation set are present in the mapping

y,n = 0,0
store = []
for i in range(len(answerList_val)):
    
    word = answerList_val[i]
    
    if word in mapping:
        y += 1
    else:
        n+=1
        store.append(i)

In [44]:
y

33175

### --------------------------------------------------------Processing Validation Set-------------------------------------------------------

In [45]:
# converting labels of val_set into numbers

labels_val = []

for i in range(len(answerList_val)):
    labels_val.append( mapping[ answerList_val[i] ] )

In [46]:
len(labels_val)

33175

In [47]:
scores_val = []

for i in tqdm(range(len(answerList_val))):
    
    s = [0] * (numOfClasses+1)
    s[ mapping[ answerList_val[i]] ] = 1
    
    scores_val.append(s)

  0%|          | 0/33175 [00:00<?, ?it/s]

In [48]:
len(scores_val)

33175

In [49]:
imgPathList_val = []
filepath = '/home/aritra/cric/images/img/'

for i in tqdm(range(len(imgList_val))):
    
    imgName = str(imgList_val[i]) + '.jpg'
    concatedPath = os.path.join(filepath,imgName)
    
    imgPathList_val.append(concatedPath)

  0%|          | 0/33175 [00:00<?, ?it/s]

In [50]:
imgPathList_val[0:5]

['/home/aritra/cric/images/img/1003.jpg',
 '/home/aritra/cric/images/img/1003.jpg',
 '/home/aritra/cric/images/img/1018.jpg',
 '/home/aritra/cric/images/img/1018.jpg',
 '/home/aritra/cric/images/img/1027.jpg']

In [51]:
# creating HF dataset to map images fast of Val_set

listToDictionary = {'questions':questionList_val, 'labels':labels_val, 'scores':scores_val, 'images':imgPathList_val}
modified_val_set = Dataset.from_dict(listToDictionary)

In [52]:
# mapping each filepath of Val Set to images in the directory

modified_val_set = modified_val_set.cast_column("images", datasets.Image())

### -------------------------------------------Extracting Test Set-------------------------------------------------


In [53]:
questionList_test = []
answerList_test = []
imgList_test = []

In [54]:
indexToExcludeTest = []

with open('error_testSet1.txt', 'r') as file:
    for line in file:
        number = int(line.strip())
        indexToExcludeTest.append(number)
        
with open('errorTestSet2.txt', 'r') as file:
    for line in file:
        number = int(line.strip())
        indexToExcludeTest.append(number)

In [55]:
len(indexToExcludeTest)

14150

In [56]:
for i in tqdm(range(len(test_json))):
    
    if i in indexToExcludeTest:
        continue
        
    pointer = test_json[i]
    
    questionList_test.append(pointer['question'])
    answerList_test.append(pointer['answer'])
    imgList_test.append(pointer['image_id'])

  0%|          | 0/86003 [00:00<?, ?it/s]

86003 -> 71863

In [57]:
len(answerList_test)

71863

### -------------------------------------- Processing Test Set ----------------------------------------------------------------------------

In [58]:
# check if all the uniques answers of test_set are present in the mapping

y,n = 0,0
store = []
for i in range(len(answerList_test)):
    
    word = answerList_test[i]
    
    if word in mapping:
        y += 1
    else:
        n+=1
        store.append(i)

In [59]:
y

71863

In [60]:
# converting answer labels of test_set into numbers
labels_test = []

for i in range(len(answerList_test)):
    labels_test.append( mapping[ answerList_test[i] ] )

In [61]:
len(labels_test)

71863

In [62]:
scores_test = []

for i in tqdm(range(len(answerList_test))):
    
    s = [0] * (numOfClasses+1)
    s[ mapping[ answerList_test[i]] ] = 1
    
    scores_test.append(s)

  0%|          | 0/71863 [00:00<?, ?it/s]

In [63]:
len(scores_test)

71863

In [64]:
imgPathList_test = []
filepath = '/home/aritra/cric/images/img/'

for i in tqdm(range(len(imgList_test))):
    
    imgName = str(imgList_test[i]) + '.jpg'
    concatedPath = os.path.join(filepath,imgName)
    
    imgPathList_test.append(concatedPath)

  0%|          | 0/71863 [00:00<?, ?it/s]

In [65]:
len(imgPathList_test)

71863

In [66]:
imgPathList_test[0:5]

['/home/aritra/cric/images/img/1004.jpg',
 '/home/aritra/cric/images/img/1004.jpg',
 '/home/aritra/cric/images/img/1004.jpg',
 '/home/aritra/cric/images/img/1004.jpg',
 '/home/aritra/cric/images/img/1004.jpg']

In [67]:
# creating HF dataset to map images fast of test_set

listToDictionary = {'questions':questionList_test, 'labels':labels_test, 'scores':scores_test, 'images':imgPathList_test}
modified_test_set = Dataset.from_dict(listToDictionary)

In [68]:
# mapping each filepath of test Set to images in the directory

modified_test_set = modified_test_set.cast_column("images", datasets.Image())

### -------------------------------End of Processing----------------------------------------------------------------------------

In [69]:
from transformers import ViltProcessor, ViltForQuestionAnswering

In [70]:
from transformers import ViltConfig
config = ViltConfig.from_pretrained("dandelin/vilt-b32-finetuned-vqa")

In [71]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [72]:
processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-mlm")

In [73]:
model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-mlm", id2label = reverse_mapping, label2id = mapping).to(device)

Some weights of ViltForQuestionAnswering were not initialized from the model checkpoint at dandelin/vilt-b32-mlm and are newly initialized: ['classifier.1.weight', 'classifier.3.weight', 'classifier.1.bias', 'classifier.0.weight', 'classifier.0.bias', 'classifier.3.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [74]:
from torch.utils.data import DataLoader
from datasets import Dataset
import numpy as np

In [75]:
class cric_dataset(Dataset):
    
    def __init__(self, dataset, processor):
        self.processor = processor
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self,idx):
        
        #print(idx)
        item = self.dataset[idx]

        #print(item)
        
        encodings = self.processor(images = item["images"], text = item["questions"], padding="max_length", truncation=True, return_tensors = "pt")
        encodings = {k:v.squeeze() for k,v in encodings.items()}
                                
        encodings['labels'] = torch.tensor(item['scores'], dtype = torch.float32)

        return encodings

In [76]:
train_dataset_object = cric_dataset(modified_train_set, processor)

In [77]:
val_dataset_object = cric_dataset(modified_val_set, processor)

In [78]:
test_dataset_object = cric_dataset(modified_test_set, processor)

In [79]:
def collate_fn(batch):
  
    input_ids = [item['input_ids'] for item in batch]
    pixel_values = [item['pixel_values'] for item in batch]
    attention_mask = [item['attention_mask'] for item in batch]
    token_type_ids = [item['token_type_ids'] for item in batch]
    labels = [item['labels'] for item in batch]

        
    # create padded pixel values and corresponding pixel mask
    
    encoding = processor.image_processor.pad(pixel_values, return_tensors="pt")

    # create new batch
    
    batch = {}
    
    batch['input_ids'] = torch.stack(input_ids)
    batch['attention_mask'] = torch.stack(attention_mask)
    batch['token_type_ids'] = torch.stack(token_type_ids)
    batch['pixel_values'] = encoding['pixel_values']
    batch['pixel_mask'] = encoding['pixel_mask']
    batch['labels'] = torch.stack(labels, dim = 0 )

    return batch


In [80]:
train_dataloader = DataLoader(train_dataset_object, collate_fn = collate_fn, shuffle = True, batch_size = 16)

In [81]:
batch = next(iter(train_dataloader))

In [82]:
for k,v in batch.items():
    print(k, v.shape)
    print()
    
#print(batch.keys())

input_ids torch.Size([16, 40])

attention_mask torch.Size([16, 40])

token_type_ids torch.Size([16, 40])

pixel_values torch.Size([16, 3, 608, 576])

pixel_mask torch.Size([16, 608, 576])

labels torch.Size([16, 596])



## Truncated Lq Loss

##### Using L’Hôpital’s rule, it can be shown that the proposed loss function is equivalent to CCE for lim q→0 Lq(f(x), ej ), and becomes MAE/unhinged loss when q = 1. This loss is a generalization of CCE and MAE with 2 hyperparameters to vary q and k. 

Note that, when k → 0, the truncated Lq loss becomes the normal Lq loss.
k = 1/c

###### According to paper, the best accuracy was obtained with q = 0.8, when the noise rate was 20%
###### Another best accuracy was obtained with q = 0.8, when the noise rate was 60%, fast convergence (near to MAE)
###### Another best accuracy was obtained with q = 1.0, when the noise rate was 60%, but with slow convergence (MAE)

##### Experimental Setup:

LR: 0.01,
Momentum: 0.9,
Weight_Decay: 0.0001,
Optimizer: SGD

In [83]:
def generalized_cross_entropy(y_true, y_pred):
    """
    2018 - nips - Generalized Cross Entropy Loss for Training Deep Neural Networks with Noisy Labels.
    """
    q = 0.8
    k = 0.001
    t_loss = ((1 - torch.pow(torch.sum(y_true * y_pred, dim = 1), q)) / q) - ( (1-(k**q))/q )
    return torch.mean(t_loss)

import torch
p = torch.tensor(
                    [
                        [0.2,0.5,0.3],
                        [0.1,0.1,0.8]
                    ]
)
t = torch.tensor(
                    [
                        [0, 0, 1],
                        [1, 0, 0]
                    ]
)

In [84]:
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01)

## Model Training Loop

In [None]:
for epoch in tqdm(range(7)):  

    model.train()
    
    for batch_idx, batch in enumerate(train_dataloader):
        
        batch = {k:v.to(device) for k,v in batch.items()}
        y_true = batch['labels']
        
        outputs = model(**batch)
        
        y_pred = torch.softmax(outputs.logits, dim = 1)
        
        #print(y_true)
        #print(y_pred)
     
        loss = generalized_cross_entropy(y_true, y_pred)
        #print(loss)        
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print(batch_idx,"-> GCE.Loss:", loss.item())
        
        if (batch_idx != 0 ) and (batch_idx % 150 == 0):
            
            model.eval()
            
            acc_score_test = calculateAccuracyTest()
            acc_score_val, validationLoss = calculateAccuracyVal()
            
            print(f'\nValidation Accuracy: {acc_score_val}, Test Accuracy: {acc_score_test} \n')
            
            model.train()

  0%|          | 0/7 [00:00<?, ?it/s]

0 -> GCE.Loss: -0.002442486584186554
1 -> GCE.Loss: -0.0013354122638702393
2 -> GCE.Loss: -0.0019480735063552856
3 -> GCE.Loss: -0.0019735917448997498
4 -> GCE.Loss: -0.001247987151145935
5 -> GCE.Loss: -0.0017781183123588562
6 -> GCE.Loss: -0.001027137041091919
7 -> GCE.Loss: -0.0015478506684303284
8 -> GCE.Loss: -0.0018725097179412842
9 -> GCE.Loss: -0.0022353678941726685
10 -> GCE.Loss: -0.0023505762219429016
11 -> GCE.Loss: -0.0018247738480567932
12 -> GCE.Loss: -0.0020718425512313843
13 -> GCE.Loss: -0.0030510053038597107
14 -> GCE.Loss: -0.0030230507254600525
15 -> GCE.Loss: -0.0035108253359794617
16 -> GCE.Loss: -0.00341913104057312
17 -> GCE.Loss: -0.003985218703746796
18 -> GCE.Loss: -0.002863071858882904
19 -> GCE.Loss: -0.0030927881598472595
20 -> GCE.Loss: -0.004137992858886719
21 -> GCE.Loss: -0.002262301743030548
22 -> GCE.Loss: -0.004064664244651794
23 -> GCE.Loss: -0.0026146993041038513
24 -> GCE.Loss: -0.004046298563480377
25 -> GCE.Loss: -0.0030619576573371887
26 -> G

216 -> GCE.Loss: -0.4634559154510498
217 -> GCE.Loss: -0.15124577283859253
218 -> GCE.Loss: -0.22920545935630798
219 -> GCE.Loss: -0.4631769061088562
220 -> GCE.Loss: -0.30731815099716187
221 -> GCE.Loss: -0.22927403450012207
222 -> GCE.Loss: -0.30729007720947266
223 -> GCE.Loss: -0.2292766571044922
224 -> GCE.Loss: -0.38543301820755005
225 -> GCE.Loss: -0.30707505345344543
226 -> GCE.Loss: -0.3852681815624237
227 -> GCE.Loss: -0.5412495732307434
228 -> GCE.Loss: -0.3852483630180359
229 -> GCE.Loss: -0.15109099447727203
230 -> GCE.Loss: -0.5413094162940979
231 -> GCE.Loss: -0.15106305480003357
232 -> GCE.Loss: -0.3853345811367035
233 -> GCE.Loss: -0.5413899421691895
234 -> GCE.Loss: -0.2292691171169281
235 -> GCE.Loss: -0.5409880876541138
236 -> GCE.Loss: -0.2292684018611908
237 -> GCE.Loss: -0.22877439856529236
238 -> GCE.Loss: -0.5414581298828125
239 -> GCE.Loss: -0.1512691080570221
240 -> GCE.Loss: -0.07314638793468475
241 -> GCE.Loss: -0.30719006061553955
242 -> GCE.Loss: -0.229271

## Reports & Results

In [85]:
# This function returns the Validation Loss and accuracy on the Validation Set

def calculateAccuracyVal():
    
    matchScore, loopCounter = 0,0
    
    for index in range(100,400):
        
        loopCounter += 1
        
        val_example = val_dataset_object[index]
        val_example = {k: v.unsqueeze(0).to(device) for k,v in val_example.items()}
        val_outputs = model(**val_example)
        
        validationLoss = val_outputs.loss

        val_logits = val_outputs.logits
        val_predicted_classes = torch.sigmoid(val_logits)
        val_ans = reverse_mapping[torch.argmax(val_predicted_classes).item()]
        
        
        # accuracy score
        
        if answerList_val[index] == val_ans:
            matchScore += 1
                
    #print(matchScore, loopCounter)
    accuracyVal = (matchScore/loopCounter)*100
    return ( accuracyVal,validationLoss.item() )

In [86]:
# This function returns accuracy on the Test Set

def calculateAccuracyTest():
    
    matchScore, loopCounter = 0,0
    model.eval()
    for index in range(100, 400):
        
        loopCounter += 1
        
        test_example = test_dataset_object[index]
        test_example = {k: v.unsqueeze(0).to(device) for k,v in test_example.items()}
        test_outputs = model(**test_example)

        test_logits = test_outputs.logits
        test_predicted_classes = torch.sigmoid(test_logits)
        test_ans = reverse_mapping[torch.argmax(test_predicted_classes).item()]
        
        # print(f'T: {answerList_val[index]} <-> P: {test_ans}' )

        # accuracy score
        
        if answerList_test[index] == test_ans:
            matchScore += 1
                
    
    print(f'\nTotal Questions {loopCounter}')
    print(f'\nCorrectly classified {matchScore}')
    
    return ((matchScore/loopCounter)*100)

In [None]:
calculateAccuracyTest()

In [None]:
# This function returns report on the Test Set

misclassifiedIndex = []
def generateReport():
    
    matchScore, loopCounter = 0,0
    model.eval()
    
    for index in range(16080,17000):
        
        loopCounter += 1
        print(f'\n{questionList_test[index]} ? Ans: {answerList_test[index]}\n')
        
        example = test_dataset_object[index]
        example = {k: v.unsqueeze(0).to(device) for k,v in example.items()}
        outputs = model(**example)

        logits = outputs.logits
        predicted_classes = torch.sigmoid(logits)
        ans = reverse_mapping[torch.argmax(predicted_classes).item()]
        
        print('Predicted Ans:', ans,'\n')
        
        probs, classes = torch.topk(predicted_classes, 4)

        for prob, class_idx in zip(probs.squeeze().tolist(), classes.squeeze().tolist()):
            print(prob, model.config.id2label[class_idx])
    
        # accuracy score
        
        if answerList_test[index] == ans:
            matchScore += 1
            print('Correct Prediction at index:', index)
        
        else:
            misclassifiedIndex.append(index)
            print('Wrong Prediction at index:', index)
    
    return ((matchScore/loopCounter)*100)

In [None]:
generateReport()

In [None]:
misclassifiedIndex

In [None]:
i = Image.open(imgPathList_test[16087])
i.thumbnail((500,500))
i

### Finding Color Questions Indices

In [None]:
# extracting the list of colors from the previously stored text files

colors = []
with open('./text_files/colors.txt', 'r') as file:
    for color in file:
        color = color.strip()
        colors.append(color)

In [None]:
colors[0:5]

In [None]:
# adding leading and trailing space in the colors

colors_spaces = [' '+ color + ' ' for color in colors] 

In [None]:
colors_spaces[0:5]

In [None]:
def isContainColor(targetString):
    
    for color in colors_spaces:
        if color in targetString:
            return True
    
    return False    

In [None]:
# This function identifies the color question for which the result is misclassified

misclassifiedIndex = []
matchScore = 0

def findAccuracyColorQuestions():
    
    global matchScore
    questionCount = 0
    model.eval()
    
    print('***** Question About Colors ************')
    
    for index in tqdm(range(0,71860)):
        
        currQuestion = questionList_test[index]        
        
        if ('color' in currQuestion) or (isContainColor(currQuestion)):
            
            questionCount += 1
            
            #print(f'\n{questionList_test[index]} ? Ans: {answerList_test[index]}\n')
            
            example = test_dataset_object[index]
            example = {k: v.unsqueeze(0).to(device) for k,v in example.items()}
            outputs = model(**example)

            logits = outputs.logits
            predicted_classes = torch.sigmoid(logits)
            ans = reverse_mapping[torch.argmax(predicted_classes).item()]

            # accuracy score

            if answerList_test[index] == ans:
                matchScore += 1

            else:    
                misclassifiedIndex.append(index)
                                
        else:
            
            continue
    
    
    print(f'\nTotal {questionCount} questions found')
    print(f'\nCorrectly Classified {matchScore}')
    print(f'\nMistakenly Classified {len(misclassifiedIndex)}')

    return ((matchScore/questionCount)*100)

In [None]:
findAccuracyColorQuestions()

In [None]:
len(misclassifiedIndex)

In [None]:
rightClassified = matchScore
missClassified = len(misclassifiedIndex)

In [None]:
import matplotlib.pyplot as plt

In [None]:
labels = ['Rightly Classified', 'MisClassified']
values = [rightClassified, missClassified]

In [None]:
plt.figure(figsize=(10, 5))

plt.xlabel(f"Model Misclassified Total {len(misclassifiedIndex)} Color based Questions")
plt.ylabel("Values")
plt.title(f"{rightClassified + missClassified} Color Based Question-Answers present in the dataset")

plt.bar(labels, values, color='lightgreen', edgecolor='black', width=0.2)
plt.grid(True)

# Show the plot
plt.show()

## Find Yes/No Questions

In [None]:
# This function identifies the yes/no type questions for which the result is misclassified

misclassifiedIndex = []

def findYesNoQuestions():
    
    matchScore, questionCount = 0,0
    model.eval()
    
    print('********* Yes/No Questions ************')
    
    for index in tqdm(range(0,71860)):
        
        currAnswer = answerList_test[index]        
        
        if ('yes' in currAnswer) or ('no' in currAnswer):
            
            questionCount += 1
            
            #print(f'\n{questionList_test[index]} ? Ans: {answerList_test[index]}\n')
            
            example = test_dataset_object[index]
            example = {k: v.unsqueeze(0).to(device) for k,v in example.items()}
            outputs = model(**example)

            logits = outputs.logits
            predicted_classes = torch.sigmoid(logits)
            ans = reverse_mapping[torch.argmax(predicted_classes).item()]

            # check if answer is correct or not

            if answerList_test[index] == ans:
                matchScore += 1
            else:
                misclassifiedIndex.append(index)
                     
        else:
            
            continue
    
    
    print(f'\nTotal {questionCount} questions found')
    print(f'\nCorrectly Classified {matchScore}')
    print(f'\nMistakenly Classified {len(misclassifiedIndex)}')

    return ((matchScore/questionCount)*100)

In [None]:
findYesNoQuestions()

In [None]:
rightClassified

In [None]:
missClassified

In [None]:
labels = ['Rightly Classified', 'MisClassified']
values = [rightClassified, missClassified]

In [None]:
plt.figure(figsize=(10, 5))

plt.xlabel(f"Model Misclassified Total {missClassified} Yes/No Questions")
plt.ylabel("Values")
plt.title(f"25171 Yes/No Question-Answers present in the dataset")

plt.bar(labels, values, color='lightgreen', edgecolor='black', width=0.2)
plt.grid(True)

# Show the plot
plt.show()

### FInd Common sense Reasoning Questions

In [None]:
# This function roughly identifies the common-sense based questions for which the result is misclassified

misclassifiedIndex = []

def findCSRQuestions():
    
    matchScore, questionCount = 0,0
    model.eval()
    
    print('********* Commonsense Reasoning Questions ************')
    
    for index in tqdm(range(0,100)):
        
        currAnswer = answerList_test[index]  
        currQuestion = questionList_test[index]
        
        if ('yes' in currAnswer) or ('no' in currAnswer) or ('color' in currQuestion) or (isContainColor(currQuestion)):
            
            continue
        
        else:

            questionCount += 1
            
            example = test_dataset_object[index]
            example = {k: v.unsqueeze(0).to(device) for k,v in example.items()}
            outputs = model(**example)

            logits = outputs.logits
            predicted_classes = torch.sigmoid(logits)
            ans = reverse_mapping[torch.argmax(predicted_classes).item()]
    
            # check if answer is correct or not

            if answerList_test[index] == ans:
                matchScore += 1

            else:
                missClassified += 1
                misclassifiedIndex.append(index)
                
                
    print(f'\nTotal {questionCount} questions found')
    print(f'\nCorrectly Classified {matchScore}')
    print(f'\nMistakenly Classified {len(misclassifiedIndex)}')

    return ((matchScore/questionCount)*100)

In [None]:
findCSRQuestions()

In [None]:
missClassified

In [None]:
rightClassified

In [None]:
labels = ['Rightly Classified', 'MisClassified']
values = [rightClassified, missClassified]

In [None]:
plt.figure(figsize=(10, 5))

plt.xlabel(f"Model Misclassified Total {missClassified} commonsense-based Questions")
plt.ylabel("Values")
plt.title(f"27277 Common-sense-based Questions present in the dataset")

plt.bar(labels, values, color='lightgreen', edgecolor='black', width=0.2)
plt.grid(True)

# Show the plot
plt.show()

In [None]:
# This function collects the indices of the color questions from the test set

colorQuestionIndices = []

def storeColorQuestionIndex():
    
    questionCount = 0 
    print('********* Storing Color Questions Indices ************')
    
    for index in tqdm(range(0,500)):
        
        currAnswer = answerList_test[index]  
        currQuestion = questionList_test[index]
                
        if ('color' in currQuestion) or (isContainColor(currQuestion)):
            #print(index,currQuestion)

            questionCount += 1
            colorQuestionIndices.append(index)
        
                
    print(f'\nTotal {questionCount} color questions found')

In [None]:
storeColorQuestionIndex()

In [None]:
len(colorQuestionIndices)

In [None]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

In [None]:
def remove_stopwords(wordList):
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in wordList if word.lower() not in stop_words]
    return filtered_words

In [None]:
# this function gathers all the words from the color questions and their frequency from all the color questions to make histogram

frquencyMap = {}

def collectWords():
    
    for index in tqdm(misclassifiedIndex):
        
        currQuestion = questionList_test[index]
        words = remove_stopwords(currQuestion.split())
        
        for word in words:
            
            if word in frquencyMap:
                
                frquencyMap[word] = frquencyMap[word] + 1
            
            else:
            
                frquencyMap[word] = 1


In [None]:
collectWords()

In [None]:
len(frquencyMap)

In [None]:
# convert to list of tuples

frquencyList = [(key,val) for key,val in frquencyMap.items()]

In [None]:
frquencyList[0:5]

In [None]:
frquencyList.sort(key = lambda x:x[1], reverse = True)

In [None]:
frquencyList[0:5]

In [None]:
import seaborn as sns

In [None]:
# top-30 words

frquencyList = frquencyList[0:30]
labels = [ val[0] for val in frquencyList]
frequncies = [ val[1] for val in frquencyList]

In [None]:
sns.set(style="whitegrid")
plt.figure(figsize=(10, 6))
ax = sns.barplot(x=frequncies, y=labels, palette="viridis")

# Customize the plot
ax.set(xlabel="Frequency", ylabel="Words", title="Word Frequency")
plt.tight_layout()

plt.show()

In [None]:
    if ('object' or 'used' or 'usually' or 'type') in currQuestion:


## Experimenting With Accuracy By Removing Most Frequent Words

In [None]:
len(questionList_test)

In [None]:
rowsAffected = 0

for index in tqdm(range(0,71860)):

    currQuestion = questionList_test[index]
    
    if ('object' in currQuestion) or ('used' in currQuestion):

        currQuestion = ' '.join([word for word in currQuestion.split() if word not in ('object','used','usually','type')])
        rowsAffected += 1
        
        questionList_test[index] = currQuestion
    
print(f'\nTotal {rowsAffected} questions affected')

In [None]:
listToDictionary = {'questions':questionList_test, 'labels':labels_test, 'scores':scores_test, 'images':imgPathList_test}
word_removed_test_set = Dataset.from_dict(listToDictionary)

In [None]:
word_removed_test_set

In [None]:
word_removed_test_set = word_removed_test_set.cast_column("images", datasets.Image())

In [None]:
index = 1038
word_removed_test_set[index]['questions'], modified_test_set[index]['questions']

In [None]:
word_removed_test_set_object = cric_dataset(word_removed_test_set, processor)

In [None]:
# This function returns report on the Test Set

misclassifiedIndex = []
def removeWordsAndGenReport():
    
    matchScore, loopCounter = 0,0
    model.eval()
    
    for index in tqdm(range(0,71860)):
        
        loopCounter += 1                            

        example = word_removed_test_set_object[index]
        example = {k: v.unsqueeze(0).to(device) for k,v in example.items()}
        outputs = model(**example)

        logits = outputs.logits
        predicted_classes = torch.sigmoid(logits)
        ans = reverse_mapping[torch.argmax(predicted_classes).item()]
        
        # print('Predicted Ans:', ans,'\n')
        
        probs, classes = torch.topk(predicted_classes, 4)

        for prob, class_idx in zip(probs.squeeze().tolist(), classes.squeeze().tolist()):
            print(end='')
    
        # accuracy score
        
        if answerList_test[index] == ans:
            matchScore += 1
        
        else:
            misclassifiedIndex.append(index)
    
    print(f'\nTotal {loopCounter} questions found')
    print(f'\nCorrectly Classified {matchScore}')
    print(f'\nMistakenly Classified {len(misclassifiedIndex)}')

    return ((matchScore/loopCounter)*100)

In [None]:
removeWordsAndGenReport()