In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
img_h = 224
img_w = 224
h_size = 128
seq_length = 100
vocabulary= 500
num_classes = 13
num_first_LSTM_layers = 3

In [None]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
import os
import tensorflow as tf
import numpy as np

# Set the seed for random operations. 
# This let our experiments to be reproducible. 
SEED = 12
tf.random.set_seed(SEED)
np.random.seed(SEED)

# Get current working directory
cwd = os.getcwd()

# Set GPU memory growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  try:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
      tf.config.experimental.set_memory_growth(gpu, True)
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
  except RuntimeError as e:
    # Memory growth must be set before GPUs have been initialized
    print(e)

In [None]:
import numpy as np
import keras
from keras.preprocessing.image import load_img, img_to_array

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, path, json_dict, batch_size=32, dim=(img_h, img_w), n_channels=3,
                 n_classes=13, shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.path = path
        self.questions = json_dict['questions']
        self.n_questions = len(self.questions)
        self.list_IDs = self.__generate_IDs()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X_image, X_question, y = self.__data_generation(list_IDs_temp)

        return X_image, X_question, y
    
    def __generate_IDs(self):
        'Generate question IDs'
        list_IDs = np.arange(self.n_questions)
        return list_IDs.tolist()

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        size = min(self.batch_size, len(list_IDs_temp))
        X_image = np.empty((size, *self.dim, self.n_channels))
        X_question = []

        y = np.empty((size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            image_filename = self.questions[ID]['image_filename']
            image_path = self.path + image_filename
            PIL_image = load_img(image_path)
            #resize the image to (224,224)
            PIL_image = PIL_image.resize((img_h,img_w), Image.ANTIALIAS)
            X_image[i,] = img_to_array(PIL_image)
            
            X_question.append(self.questions[ID]['question'])

            # Store class
            answer = self.questions[ID]['answer']
            if answer == 'no':
                y[i] = 11
            elif answer == 'yes':
                y[i] = 12
            else:
                y[i] = answer
        
#         print(set(y))

        return X_image, X_question, y

In [None]:
import json

with open('/kaggle/input/ann-and-dl-vqa/dataset_vqa/train_data.json', 'r') as f:
    json_dict = json.load(f)

In [None]:
dg = DataGenerator('/kaggle/input/ann-and-dl-vqa/dataset_vqa/train/', json_dict)

In [None]:
data = dg.data_generation([1,2,3,4,5])
data

In [None]:
json_dict['questions'][1:6]

In [None]:
data[0][0].shape

In [None]:
%matplotlib inline

import time
import matplotlib.pyplot as plt
from IPython.display import clear_output

from PIL import Image

fig, ax = plt.subplots(1, 5, figsize=(16,16))
for i in range(5):
    img_arr = np.expand_dims(data[0][i], 0)
    ax[i].imshow(np.uint8(img_arr[0, ...]))

In [None]:
#Load VGG19
transfer = tf.keras.applications.vgg19.VGG19(include_top=True, weights='imagenet', input_shape=(224, 224, 3), pooling='None')

In [None]:
transfer.trainable = False

In [None]:
transfer.summary()

In [None]:
image_input = tf.keras.layers.Input(shape=(224, 224, 3))
x = transfer.get_layer("block1_conv1")(image_input)
x = transfer.get_layer("block1_conv2")(x)
x = transfer.get_layer("block1_pool")(x)
x = transfer.get_layer("block2_conv1")(x)
x = transfer.get_layer("block2_conv2")(x)
x = transfer.get_layer("block2_pool")(x)
x = transfer.get_layer("block3_conv1")(x)
x = transfer.get_layer("block3_conv2")(x)
x = transfer.get_layer("block3_conv3")(x)
x = transfer.get_layer("block3_conv4")(x)
x = transfer.get_layer("block3_pool")(x)
x = transfer.get_layer("block4_conv1")(x)
x = transfer.get_layer("block4_conv2")(x)
x = transfer.get_layer("block4_conv3")(x)
x = transfer.get_layer("block4_conv4")(x)
x = transfer.get_layer("block4_pool")(x)
x = transfer.get_layer("block5_conv1")(x)
x = transfer.get_layer("block5_conv2")(x)
x = transfer.get_layer("block5_conv3")(x)
x = transfer.get_layer("block5_conv4")(x)
x = transfer.get_layer("block5_pool")(x)
x = transfer.get_layer("flatten")(x)
x = transfer.get_layer("fc1")(x)
image_output = transfer.get_layer("fc2")(x)

In [None]:
#transfer.trainable = False
#image_model = tf.keras.Sequential()
#image_model.add(transfer)
#image_model.add(tf.keras.layers.Flatten())
#image_model.summary()

In [None]:
#language_model = tf.keras.Sequential()
##################################################################################################################
#QUETSA SOTTO E' CORRETTA, MA SERVE VOCABULARY
#model.add(tf.keras.layers.LSTM(units=h_size, batch_input_shape=[None, seq_length, len(vocabulary)], 
 #                              return_sequences=True, stateful=False))
#QUESTA SOTTO NON E' CORRETTA, MA E' TESTABILE
#for i in range(num_first_LSTM_layers):
 # language_model.add(tf.keras.layers.LSTM(units=h_size, batch_input_shape=[None, seq_length, 1000], 
                               return_sequences=True, stateful=False))
###############################################################################################################
#language_model.add(tf.keras.layers.LSTM(units=h_size, return_sequences=False, stateful=False))
#language_model.summary()

In [None]:
question_input = tf.keras.layers.Input(shape=(h_size,), dtype='int32')
embedded_question = tf.keras.layers.Embedding(input_dim=10000, output_dim=256, input_length=h_size)(question_input)
language_output = tf.keras.layers.LSTM(256)(embedded_question)

In [None]:
combined = tf.keras.layers.concatenate([image_output, language_output])

In [None]:
dense_1 = tf.keras.layers.Dense(units = 128, activation='softmax')(combined)
output = tf.keras.layers.Dense(units = num_classes, activation='softmax')(dense_1)
model = tf.keras.models.Model(inputs=[image_input, question_input], outputs=output)

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.summary()

In [None]:
tf.keras.utils.plot_model(model)