In [None]:
!git clone https://github.com/sominwadhwa/vqamd_floyd

In [None]:
#%cd vqamd_floyd/
!bash run_me_first_on_floyd.sh

In [None]:
import sys, warnings
warnings.filterwarnings("ignore")
from random import shuffle, sample
import pickle as pk
import gc

import numpy as np
import pandas as pd
import scipy.io
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.utils import np_utils, generic_utils
from progressbar import Bar, ETA, Percentage, ProgressBar    
from keras.models import model_from_json
from sklearn.preprocessing import LabelEncoder
import spacy
#from spacy.en import English
import src
from src.utils import *
from src.features import *

In [None]:
!pip install src

In [None]:
!python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"

In [None]:
training_questions = open("preprocessed/v1/ques_train.txt","rb").read().decode('utf8').splitlines()
answers_train      = open("preprocessed/v1/answer_train.txt","rb").read().decode('utf8').splitlines()
images_train       = open("preprocessed/v1/images_coco_id.txt","rb").read().decode('utf8').splitlines()
img_ids            = open('preprocessed/v1/coco_vgg_IDMap.txt').read().splitlines()
vgg_path           = "/content/vqamd_floyd/vgg_feats.mat"

In [None]:
sample(list(zip(images_train, training_questions, answers_train)), 5)

In [None]:
#!python -m spacy download en_core_web_sm
import spacy
spacy.load('en_core_web_sm')
print ("Loaded WordVec")

In [None]:
%time vgg_features = scipy.io.loadmat('/content/vqamd_floyd/vgg_feats.mat')
img_features = vgg_features['feats']
id_map = dict()
print ("Loaded VGG Weights")

In [None]:
gc.collect()

In [None]:
upper_lim = 1000 #Number of most frequently occurring answers in COCOVQA (Coverting >85% of the total data)
training_questions, answers_train, images_train = freq_answers(training_questions, answers_train, images_train, upper_lim)
print (len(training_questions), len(answers_train),len(images_train))

In [None]:
lbl = LabelEncoder()
lbl.fit(answers_train)
nb_classes = len(list(lbl.classes_))
pk.dump(lbl, open('preprocessed/v1/label_encoder_mlp.sav','wb'))

In [None]:
num_hidden_units  = 1024
num_hidden_layers = 3
batch_size        = 256
dropout           = 0.5
activation        = 'tanh'
img_dim           = 4096
word2vec_dim      = 300

In [None]:
num_epochs = 200
log_interval = 25

In [None]:
for ids in img_ids:
    id_split = ids.split()
    id_map[id_split[0]] = int(id_split[1])

In [None]:
model = Sequential()
model.add(Dense(num_hidden_units, input_dim=word2vec_dim+img_dim, kernel_initializer='uniform'))
model.add(Dropout(dropout))
for i in range(num_hidden_layers):
    model.add(Dense(num_hidden_units, kernel_initializer='uniform'))
    model.add(Activation(activation))
    model.add(Dropout(dropout))
model.add(Dense(nb_classes, kernel_initializer='uniform'))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
#tensorboard = TensorBoard(log_dir='/output/Graph', histogram_freq=0, write_graph=True, write_images=True)
model.summary()

In [None]:
model_dump = model.to_json()
open('baseline_mlp'  + '.json', 'w').write(model_dump)

In [None]:
for k in range(num_epochs):
    index_shuffle = list(range(len(training_questions)))
    shuffle(index_shuffle)
    training_questions = [training_questions[i] for i in index_shuffle]
    answers_train = [answers_train[i] for i in index_shuffle]
    images_train = [images_train[i] for i in index_shuffle]
    progbar = generic_utils.Progbar(len(training_questions))
    for ques_batch, ans_batch, im_batch in zip(grouped(training_questions, batch_size, 
                                                       fillvalue=training_questions[-1]), 
                                               grouped(answers_train, batch_size, 
                                                       fillvalue=answers_train[-1]), 
                                               grouped(images_train, batch_size, fillvalue=images_train[-1])):
        X_ques_batch = get_questions_sum(ques_batch, nlp)
        X_img_batch = get_images_matrix(im_batch, id_map, img_features)
        X_batch = np.hstack((X_ques_batch, X_img_batch))
        Y_batch = get_answers_sum(ans_batch, lbl)
        #loss = model.train_on_batch(X_batch, Y_batch,callbacks= [tensorboard])
        loss = model.train_on_batch(X_batch, Y_batch)
        progbar.add(batch_size, values=[('train loss', loss)])

    if k%log_interval == 0:
        model.save_weights("weights/MLP" + "_epoch_{:02d}.hdf5".format(k))
model.save_weights("weights/MLP" + "_epoch_{:02d}.hdf5".format(k))

In [None]:
model = model_from_json(open('baseline_mlp.json').read())
# In case you wish to evaluate the model you just trained, uncomment the following line of code & comment out the subsequent one -
#model.load_weights('weights/MLP_epoch_25.hdf5')
model.load_weights('/floyd/input/vqa_data/weights/MLP_epoch_25.hdf5')
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

print ("Model Loaded with Weights")
model.summary()

In [None]:
val_imgs = open('preprocessed/v1/val_images_coco_id.txt','rb').read().decode('utf-8').splitlines()
val_ques = open('preprocessed/v1/ques_val.txt','rb').read().decode('utf-8').splitlines()
val_ans  = open('preprocessed/v1/answer_val.txt','rb').read().decode('utf-8').splitlines()

In [None]:
label_encoder = pk.load(open('preprocessed/v1/label_encoder_mlp.sav','rb'))

In [None]:
y_pred = []
batch_size = 128 

#print ("Word2Vec Loaded!")

widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'), ' ', ETA()]
pbar = ProgressBar(widgets=widgets)
#i=1

In [None]:
for qu_batch,an_batch,im_batch in pbar(zip(grouped(val_ques, batch_size, fillvalue=val_ques[0]), grouped(val_ans, batch_size, fillvalue=val_ans[0]), grouped(val_imgs, batch_size, fillvalue=val_imgs[0]))):
    X_q_batch = get_questions_matrix(qu_batch, nlp)
    X_i_batch = get_images_matrix(im_batch, id_map, img_features)
    X_batch = np.hstack((X_q_batch, X_i_batch))
    y_predict = model.predict_classes(X_batch, verbose=0)
    y_pred.extend(label_encoder.inverse_transform(y_predict))
    #print (i,"/",len(val_ques))
    #i+=1
    #print(label_encoder.inverse_transform(y_predict))

In [None]:

    except:
        pass

print ("Accuracy: ", round((correct_val/total)*100,2))
#f1.write('Final Accuracy is ' + str(round(correct_val/total),2)*100)
f1.close()

In [None]:
%cd src

In [None]:
!python test.py