#VQA - 10685642_10717531_10703095




In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import os
import tensorflow as tf
import numpy as np


SEED = 1234
tf.random.set_seed(SEED) 

In [None]:
cwd = os.getcwd()

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip /content/drive/My\ Drive/ANNDL/anndl-2020-vqa.zip

In [None]:
!ls /content/VQA_Dataset/


In [None]:
from PIL import Image
import numpy as np
import pandas as pd  
import json
import cv2
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import matplotlib.pyplot as plt

base_path = 'VQA_Dataset/'
imgs_path = os.path.join(base_path,'Images') 
train_json_path = os.path.join(base_path,'train_questions_annotations.json')
test_json_path =  os.path.join(base_path,'test_questions.json')

dataset_split = 0.9
img_h = 256
img_w = 256
batch_size = 64

classes = {
    '0': 0,
    '1': 1,
    '2': 2,
    '3': 3,
    '4': 4,
    '5': 5,
    'apple': 6,
    'baseball': 7,
    'bench': 8,
    'bike': 9,
    'bird': 10,
    'black': 11,
    'blanket': 12,
    'blue': 13,
    'bone': 14,
    'book': 15,
    'boy': 16,
    'brown': 17,
    'cat': 18,
    'chair': 19,
    'couch': 20,
    'dog': 21,
    'floor': 22,
    'food': 23,
    'football': 24,
    'girl': 25,
    'grass': 26,
    'gray': 27,
    'green': 28,
    'left': 29,
    'log': 30,
    'man': 31,
    'monkey bars': 32,
    'no': 33,
    'nothing': 34,
    'orange': 35,
    'pie': 36,
    'plant': 37,
    'playing': 38,
    'red': 39,
    'right': 40,
    'rug': 41,
    'sandbox': 42,
    'sitting': 43,
    'sleeping': 44,
    'soccer': 45,
    'squirrel': 46,
    'standing': 47,
    'stool': 48,
    'sunny': 49,
    'table': 50,
    'tree': 51,
    'watermelon': 52,
    'white': 53,
    'wine': 54,
    'woman': 55,
    'yellow': 56,
    'yes': 57}

N_CLASSES = len(classes)



#Custom Data Generator

In [None]:
class DataGenerator(tf.keras.utils.Sequence):
    
    def __init__(self, list_IDs,train_input_answer, image_path, train_input_questions, max_length, to_fit=True,
                 batch_size=16, dim=(img_h, img_w), n_channels=3, n_classes=N_CLASSES, shuffle=True):
        self.list_IDs = list_IDs
        self.train_input_answer = train_input_answer
        self.train_input_questions = train_input_questions
        self.image_path = image_path
        self.to_fit = to_fit
        self.batch_size = batch_size
        self.dim = dim
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.img_h = dim[0]
        self.img_w = dim[1]
        self.max_length = max_length
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X = self._generate_X(list_IDs_temp)

        if self.to_fit:
            y = self._generate_y(list_IDs_temp)
            return X, y
        else:
            return X

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def _generate_X(self, list_IDs_temp): 
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        X2 = np.empty((self.batch_size, self.max_length))
        
        for i, ID in enumerate(list_IDs_temp):
            X[i,] = self._load_image(self.image_path[self.list_IDs.index(ID)], self.img_w, self.img_h)
            X2[i,] = (self.train_input_questions[self.list_IDs.index(ID)]).tolist()
        ole = [X2, X]
        
        return ole

    def _generate_y(self, list_IDs_temp):
        y = np.empty((self.batch_size, 1), dtype=int)
        for i, ID in enumerate(list_IDs_temp):
            y[i] = self.train_input_answer[self.list_IDs.index(ID)]

        return y

    def _load_image(self, image_path, img_w, img_h):
        image = cv2.imread(os.path.join(imgs_path,image_path+'.png') )    
        image = cv2.resize(image, (img_w, img_h))
        image = image/ 255.
        return image


#Reading Data

In [None]:
def readTrainJson(data, first, last):
        images = []
        questions = []
        answers = []
        quest_id = []

        for question in list(data)[first:last]:
            image_id = data[question]['image_id']  
            quest = data[question]['question']
            quest = quest.split(" ")
            for i in range(len(quest)):
                quest[i] = quest[i].replace("?", "")
            ans = data[question]['answer']

            quest_id.append(int(question))
            images.append(image_id)
            questions.append(quest)
            answers.append(classes[ans])
        return images, questions, answers,quest_id

def readTestJson(data, first, last):
    images = []
    questions = []
    quest_id = []
    for question in list(data)[first:last]:
     
        image_id = data[question]['image_id']
        quest = data[question]['question']
        quest = quest.split(" ")
        for i in range(len(quest)):
            quest[i] = quest[i].replace("?", "")
        
        quest_id.append(int(question))
        images.append(image_id)
        questions.append(quest)
    return images, questions,quest_id


def readTrainJsonToPandas(data, first, last):   
        trainList=[]
        for question in list(data)[first:last]:
            temp=[]
            image_id = data[question]['image_id']  
            quest = data[question]['question'] 
            ans = data[question]['answer']

            image_path = os.path.join(imgs_path,image_id+'.png')
            temp.append(image_path)
            temp.append(quest)
            temp.append(ans)
            trainList.append(temp)
        return trainList

def readTestJsonToPandas(data, first, last):
    testList=[]

    for question in list(data)[first:last]:
        temp=[]
        image_id = data[question]['image_id']
        quest = data[question]['question']
        image_path = os.path.join(imgs_path,image_id+'.png')
        temp.append(image_path)
        temp.append(quest)
        testList.append(temp)
    return testList    

#Description Generator


In [None]:
#read train JSON file
with open(train_json_path, 'r') as f:
    train_data = json.load(f)

f.close()

#read test JSON file
with open(test_json_path, 'r') as f:
    test_data = json.load(f)
f.close()


TOT_QUESTIONS = len(train_data)
TRAIN_QUESTIONS = int(TOT_QUESTIONS*dataset_split)
VALID_QUESTIONS = TOT_QUESTIONS-TRAIN_QUESTIONS

#Transforming Data from Jason to Pandas Dataframe
labels=['im_path','ques','answ']
totalList = readTrainJsonToPandas(train_data, 0, TOT_QUESTIONS)
total_datafram = pd.DataFrame.from_records(totalList, columns=labels)

#extract images, questions and answer from the train and test files
train_images, train_questions, train_answers,train_questions_id = readTrainJson(train_data, 0, TRAIN_QUESTIONS)
valid_images, valid_questions, valid_answers,valid_questions_id = readTrainJson(train_data, TRAIN_QUESTIONS, TOT_QUESTIONS)
test_images, test_questions, test_questions_id = readTestJson(test_data, 0, len(test_data))


In [None]:
if 'tokenizer' not in globals():       
    tokenizer = tf.keras.preprocessing.text.Tokenizer()

    with open(train_json_path, 'r') as f:
        data = json.load(f)

        for question in list(data):
            quest = data[question]['question']
            quest = quest.split(" ")
            for i in range(len(quest)):
                quest[i] = quest[i].replace("?", "")

           
            tokenizer.fit_on_texts(quest)            
    f.close()
words_number = len(tokenizer.word_index) + 1

In [None]:
sequences = tokenizer.texts_to_sequences(train_questions)
max_length = max(len(sequence) for sequence in sequences)
train_input_questions = pad_sequences(sequences, maxlen=max_length)

sequences = tokenizer.texts_to_sequences(valid_questions)
valid_input_questions = pad_sequences(sequences, maxlen=max_length)

tokenizer.fit_on_texts(test_questions)
sequences = tokenizer.texts_to_sequences(test_questions)
test_input_questions = pad_sequences(sequences, maxlen=max_length)

words_number = len(tokenizer.word_index) + 1

In [None]:
# Create the generator

words_number = len(tokenizer.word_index) + 1

training_generator = DataGenerator(train_questions_id,train_answers, train_images, train_input_questions, max_length, batch_size=batch_size, dim=(img_h, img_w), n_classes=N_CLASSES)
validation_generator = DataGenerator(valid_questions_id,valid_answers, valid_images, valid_input_questions, max_length, batch_size=batch_size, dim=(img_h, img_w), n_classes=N_CLASSES)
test_generator = DataGenerator(test_questions_id,test_questions_id, test_images, test_input_questions,  max_length, to_fit=False, batch_size=1, dim=(img_h, img_w), n_classes=N_CLASSES, shuffle=False)
 

#CNN & RNN
Concatenating the results of CNN and RNN

In [None]:

#inc_model = tf.keras.applications.Xception(input_shape=(img_h, img_w, 3), include_top=False, weights='imagenet')
#inc_model = tf.keras.applications.VGG19(input_shape=(img_h, img_w, 3), include_top=False, weights='imagenet')

inp_size = img_h

inc_model = tf.keras.applications.InceptionResNetV2(input_shape=(img_h, img_w, 3), include_top=False, weights='imagenet')
for i in range(len(inc_model.layers) - 40):
    inc_model.layers[i].trainable = False

global_average_layer = tf.keras.layers.GlobalAveragePooling2D()

vqa = tf.keras.models.Sequential()
vqa.add(tf.keras.layers.Dropout(0.2))
vqa.add(inc_model)
vqa.add(global_average_layer)
vqa.add(tf.keras.layers.Dropout(0.5))
vqa.add(tf.keras.layers.Flatten())

image_input = tf.keras.layers.Input(shape=(img_h, img_w, 3))
encoded_image = vqa(image_input)

#-------------------------------------------------------------------------------------------------

# # Define RNN for language input
# question_input = tf.keras.layers.Input(shape=[max_length])
# embedded_question = tf.keras.layers.Embedding(input_dim=words_number, output_dim=1024, input_length=max_length)(question_input)
# encoded_question = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(1024, dropout=0.4, recurrent_dropout=0.2))(embedded_question)

# # Combine CNN and RNN to create the final model
# merged = tf.keras.layers.concatenate([encoded_question, encoded_image])
# output = tf.keras.layers.Dense(1024, activation='relu')(merged)
# output = tf.keras.layers.Dropout(0.5)(output)
# output = tf.keras.layers.Dense(512, activation='relu')(output)
# output = tf.keras.layers.Dropout(0.3)(output)
# output = tf.keras.layers.Dense(256, activation='relu')(output)
# output = tf.keras.layers.Dropout(0.3)(output)
# output = tf.keras.layers.Dense(128, activation='relu')(output)
# output = tf.keras.layers.Dropout(0.5)(output)
# output = tf.keras.layers.Dense(len(classes), activation='softmax')(output)
# model = tf.keras.models.Model(inputs=[question_input, image_input], outputs=output)


#-------------------------------------------------------------------------------------------------

# inc_model = tf.keras.applications.Xception(input_shape=(img_h, img_w, 3), include_top=False, weights='imagenet')
# for i in range(len(inc_model.layers)):
#     inc_model.layers[i].trainable = False    
# vqa = tf.keras.models.Sequential()
# vqa.add(inc_model)
# vqa.add(tf.keras.layers.Flatten())

# #vqa.add(tf.keras.layers.Dense(inp_size))
# image_input = tf.keras.layers.Input(shape=(img_h, img_w, 3))
# encoded_image = vqa(image_input)


# Define RNN for language input
question_input = tf.keras.layers.Input(shape=[max_length])
embedded_question = tf.keras.layers.Embedding(input_dim=words_number, output_dim=512, input_length=100)(question_input)
encoded_question = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(inp_size, dropout=0.1, recurrent_dropout=0.1, unroll=True))(embedded_question)
encoded_question = tf.keras.layers.LSTM(512)(embedded_question)

# 3 layers of LSTM:

# encoded_question = tf.keras.layers.LSTM(512, return_sequences=True)(embedded_question)
# encoded_question = tf.keras.layers.LSTM(512, return_sequences=True)(encoded_question)
# encoded_question = tf.keras.layers.LSTM(512)(encoded_question)


# Combine CNN and RNN to create the final model
merged = tf.keras.layers.concatenate([encoded_question, encoded_image])
output = tf.keras.layers.Dense(2048, activation='relu')(merged)

output = tf.keras.layers.Dense(len(classes), activation='softmax')(output)
model = tf.keras.models.Model(inputs=[question_input, image_input], outputs=output)

vqa.summary()
model.summary()

#Optimization Parameters


In [None]:
# Optimization params

# Loss
loss = tf.keras.losses.SparseCategoricalCrossentropy()

# learning rate
#lr = 1e-4
lr = 0.0002
#optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=lr, rho=0.9)

# Validation metrics

metrics = ['accuracy']

# Compile Model
model.compile(optimizer=optimizer, 
          loss=loss, 
          metrics=metrics
          )


In [None]:
import os
from datetime import datetime

cwd = os.getcwd()

exps_dir = os.path.join(cwd, 'drive/My Drive/Keras4/', 'multiclass_segmentation_experiments')
if not os.path.exists(exps_dir):
    os.makedirs(exps_dir)

now = datetime.now().strftime('%b%d_%H-%M-%S')

model_name = 'CNN'

exp_dir = os.path.join(exps_dir, model_name + '_' + str(now))
if not os.path.exists(exp_dir):
    os.makedirs(exp_dir)
    
callbacks = []

# Model checkpoint
# ----------------
ckpt_dir = os.path.join(exp_dir, 'ckpts')
if not os.path.exists(ckpt_dir):
    os.makedirs(ckpt_dir)

ckpt_callback = tf.keras.callbacks.ModelCheckpoint(filepath=os.path.join(ckpt_dir, 'cp_{epoch:02d}.ckpt'), monitor='val_loss',
                                                   verbose=1, save_best_only=True, mode='min',
                                                   save_weights_only=True)  # False to save the model directly
callbacks.append(ckpt_callback)

# Visualize Learning on Tensorboard
# ---------------------------------
tb_dir = os.path.join(exp_dir, 'tb_logs')
if not os.path.exists(tb_dir):
    os.makedirs(tb_dir)
    
# By default shows losses and metrics for both training and validation
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tb_dir,
                                             profile_batch=0,
                                             histogram_freq=0)  # if 1 shows weights histograms
callbacks.append(tb_callback)

# Early Stopping
# --------------
early_stop = True
if early_stop:
    es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4)
    callbacks.append(es_callback)


In [None]:
model.fit(training_generator,
                        validation_data=validation_generator,
                        epochs=25,
                        callbacks=callbacks)

Epoch 1/25

Epoch 00001: val_loss improved from inf to 1.33609, saving model to /content/drive/My Drive/Keras4/multiclass_segmentation_experiments/CNN_Jan31_19-55-21/ckpts/cp_01.ckpt
Epoch 2/25

Epoch 00002: val_loss improved from 1.33609 to 1.19694, saving model to /content/drive/My Drive/Keras4/multiclass_segmentation_experiments/CNN_Jan31_19-55-21/ckpts/cp_02.ckpt
Epoch 3/25

Epoch 00003: val_loss improved from 1.19694 to 1.06842, saving model to /content/drive/My Drive/Keras4/multiclass_segmentation_experiments/CNN_Jan31_19-55-21/ckpts/cp_03.ckpt
Epoch 4/25

Epoch 00004: val_loss improved from 1.06842 to 1.05818, saving model to /content/drive/My Drive/Keras4/multiclass_segmentation_experiments/CNN_Jan31_19-55-21/ckpts/cp_04.ckpt
Epoch 5/25

Epoch 00005: val_loss improved from 1.05818 to 1.01184, saving model to /content/drive/My Drive/Keras4/multiclass_segmentation_experiments/CNN_Jan31_19-55-21/ckpts/cp_05.ckpt
Epoch 6/25

Epoch 00006: val_loss improved from 1.01184 to 1.00151, s

<tensorflow.python.keras.callbacks.History at 0x7fd06e255e80>

In [None]:
model.load_weights('/content/drive/My Drive/Keras4/multiclass_segmentation_experiments/CNN_Jan31_19-55-21/ckpts/cp_06.ckpt')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fd04c1d9ef0>

In [None]:
pred = model.predict_generator(test_generator)



# Prepare Prediction

In [None]:
import os
from datetime import datetime

csv_fname = 'submission.csv'

with open(csv_fname, 'w') as f:
    f.write('Id,Category\n')
    for i in range(len(pred)):
        f.write(str(test_questions_id[i]) + ',' + str(np.argmax(pred[i])) + '\n')
    

12

10

9

9

9

8

9

9

10

8

9

9

10

8

10

10

10

10

10

10

9

10

10

9

9

8

9

9

9

9

10

9

9

9

8

9

10

8

9

10

9

9

8

7

9

10

10

10

9

9

9

8

10

10

10

9

8

8

8

9

10

9

10

10

10

9

10

9

9

10

9

10

10

9

7

10

9

10

10

9

10

9

9

10

9

9

9

9

9

10

9

10

10

9

9

9

10

8

10

9

9

9

9

10

9

10

9

10

9

9

8

9

10

10

9

9

10

9

10

8

10

9

10

7

10

10

9

9

8

8

9

10

9

9

10

9

8

9

9

9

10

8

10

9

10

9

8

9

9

9

9

9

9

8

10

9

9

8

10

9

8

10

8

9

9

10

10

9

9

9

9

9

9

9

10

10

10

9

10

9

9

8

10

10

10

9

8

9

9

10

10

8

9

9

10

9

10

9

10

9

8

10

9

9

10

10

10

9

9

8

9

8

9

9

8

9

9

10

9

8

10

9

9

8

7

8

10

9

9

10

10

10

9

10

9

8

9

10

10

10

9

9

9

10

9

9

10

10

8

8

9

10

9

10

9

9

10

9

9

9

10

9

9

9

10

9

10

10

9

9

9

9

10

8

8

9

10

10

9

9

9

9

9

10

10

8

8

9

8

9

8

9

10

9

9

9

10

8

9

8

9

9

8

9

9

8

8

10

9

10

9

10

10

10

10

9

9

9

10

10

10

8

9

9

10

9

10

9

8

9

8

10

9

9

9

9

8

10

10

9

9

8

10

9

10

9

10

10

9

10

9

10

8

10

10

9

9

9

10

9

9

9

9

10

9

9

9

10

9

9

9

9

9

8

9

9

9

10

9

10

9

10

9

9

8

9

9

10

10

9

9

9

8

10

9

9

9

8

8

9

9

8

9

9

9

9

10

9

9

9

9

9

9

10

9

9

8

10

9

10

9

10

9

8

9

10

9

9

8

9

9

9

10

9

9

10

9

9

9

9

9

10

9

9

8

9

9

9

9

9

9

10

9

9

10

9

10

9

9

9

10

10

9

8

10

10

9

8

8

8

9

8

10

8

10

10

10

10

10

8

10

10

10

9

10

10

8

10

10

9

10

10

9

9

9

9

9

9

10

9

9

10

10

10

8

9

10

10

9

9

10

10

10

8

9

10

10

8

9

10

9

9

9

10

10

9

9

10

10

10

7

10

10

8

9

9

9

10

9

9

10

10

9

10

10

10

10

9

10

9

10

10

10

9

10

9

9

10

10

10

7

10

10

10

9

9

9

10

8

7

10

10

10

9

9

9

9

9

9

10

10

8

10

10

9

10

10

8

9

In [None]:
import pandas as pd 

data = pd.read_csv("submission.csv") 

data.count

<bound method DataFrame.count of           Id  Category
0     169491        40
1      33711        34
2     100051         4
3      15271        57
4      13291         1
...      ...       ...
6367  278372        33
6368  264751         2
6369  236751         3
6370  265472        57
6371  218312        54

[6372 rows x 2 columns]>

In [None]:
from google.colab import files
files.download('submission.csv') 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>