In [None]:
!git clone https://github.com/sominwadhwa/vqamd_floyd

In [None]:
#%cd vqamd_floyd/
!bash run_me_first_on_floyd.sh

In [None]:
import sys, warnings
warnings.filterwarnings("ignore")
from random import shuffle, sample
import pickle as pk
import gc

import numpy as np
import pandas as pd
import scipy.io
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD
from keras.utils import np_utils, generic_utils
from progressbar import Bar, ETA, Percentage, ProgressBar    
from keras.models import model_from_json
from sklearn.preprocessing import LabelEncoder
import spacy
#from spacy.en import English
import src
from src.utils import *
from src.features import *

In [None]:
!pip install src

In [None]:
!python -c "import tensorflow as tf; print(tf.GIT_VERSION, tf.VERSION)"

In [None]:
training_questions = open("preprocessed/v1/ques_train.txt","rb").read().decode('utf8').splitlines()
answers_train      = open("preprocessed/v1/answer_train.txt","rb").read().decode('utf8').splitlines()
images_train       = open("preprocessed/v1/images_coco_id.txt","rb").read().decode('utf8').splitlines()
img_ids            = open('preprocessed/v1/coco_vgg_IDMap.txt').read().splitlines()
vgg_path           = "/content/vqamd_floyd/vgg_feats.mat"

In [None]:
sample(list(zip(images_train, training_questions, answers_train)), 5)

In [None]:
#!python -m spacy download en_core_web_sm
import spacy
spacy.load('en_core_web_sm')
print ("Loaded WordVec")

In [None]:
%time vgg_features = scipy.io.loadmat('/content/vqamd_floyd/vgg_feats.mat')
img_features = vgg_features['feats']
id_map = dict()
print ("Loaded VGG Weights")

In [None]:
gc.collect()

In [None]:
upper_lim = 1000 #Number of most frequently occurring answers in COCOVQA (Covering >80% of the total data)
training_questions, answers_train, images_train = freq_answers(training_questions, 
                                                               answers_train, images_train, upper_lim)
training_questions_len, training_questions, answers_train, images_train = (list(t) for t in zip(*sorted(zip(training_questions_len, 
                                                                                                          training_questions, answers_train, 
                                                                                                          images_train))))
print (len(training_questions), len(answers_train),len(images_train))

In [None]:
lbl = LabelEncoder()
lbl.fit(answers_train)
nb_classes = len(list(lbl.classes_))
pk.dump(lbl, open('preprocessed/v1/label_encoder_lstm.sav','wb'))

In [None]:
batch_size               =      256
img_dim                  =     4096
word2vec_dim             =      300
#max_len                 =       30 # Required only when using Fixed-Length Padding

num_hidden_nodes_mlp     =     1024
num_hidden_nodes_lstm    =      512
num_layers_mlp           =        3
num_layers_lstm          =        3
dropout                  =       0.5
activation_mlp           =     'tanh'

In [None]:
# Change the following based on your usage, THESE WILL DIRECTLY AFFECT THE DURATION OF NETWORK TRAINING
num_epochs               =         300 
log_interval             =         15

In [None]:
for ids in img_ids:
    id_split = ids.split()
    id_map[id_split[0]] = int(id_split[1])

In [None]:
image_model = Sequential()
image_model.add(Reshape(input_shape = (img_dim,), target_shape=(img_dim,)))
image_model.summary()

In [None]:
language_model = Sequential()
language_model.add(LSTM(output_dim=num_hidden_nodes_lstm, 
                        return_sequences=True, input_shape=(None, word2vec_dim)))

for i in range(num_layers_lstm-2):
    language_model.add(LSTM(output_dim=num_hidden_nodes_lstm, return_sequences=True))
language_model.add(LSTM(output_dim=num_hidden_nodes_lstm, return_sequences=False))

language_model.summary()

In [None]:
model = Sequential()
model.add(Merge([language_model, image_model], mode='concat', concat_axis=1))
for i in range(num_layers_mlp):
    model.add(Dense(num_hidden_nodes_mlp, init='uniform'))
    model.add(Activation('tanh'))
    model.add(Dropout(0.5))
model.add(Dense(upper_lim))
model.add(Activation("softmax"))

In [None]:
model_dump = model.to_json()
open('lstm_structure'  + '.json', 'w').write(model_dump)

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.summary()

In [None]:
for k in range(num_epochs):
    progbar = generic_utils.Progbar(len(training_questions))
    for ques_batch, ans_batch, im_batch in zip(grouped(training_questions, batch_size, 
                                                       fillvalue=training_questions[-1]), 
                                               grouped(answers_train, batch_size, 
                                                       fillvalue=answers_train[-1]), 
                                               grouped(images_train, batch_size, fillvalue=images_train[-1])):
        timestep = len(nlp(ques_batch[-1]))
        X_ques_batch = get_questions_tensor_timeseries(ques_batch, nlp, timestep)
        #print (X_ques_batch.shape)
        X_img_batch = get_images_matrix(im_batch, id_map, img_features)
        Y_batch = get_answers_sum(ans_batch, lbl)
        loss = model.train_on_batch([X_ques_batch, X_img_batch], Y_batch)
        progbar.add(batch_size, values=[('train loss', loss)])
    if k%log_interval == 0:
        model.save_weights("weights/LSTM" + "_epoch_{:02d}.hdf5".format(k))
model.save_weights("weights/LSTM" + "_epoch_{:02d}.hdf5".format(k))

In [None]:
model = model_from_json(open('lstm_structure.json').read()) #fully trained model & weights present at /floyd/input/vqa_data/weights/
# In case you wish to evaluate the model you just trained, uncomment the following line of code & comment out the subsequent one -
#model.load_weights('weights/LSTM_epoch_00.hdf5')
model.load_weights('/floyd/input/vqa_data/weights/LSTM_epoch_45.hdf5')
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

print ("Model Loaded with Weights")
model.summary()

In [None]:
val_imgs = open('preprocessed/v1/val_images_coco_id.txt','rb').read().decode('utf-8').splitlines()
val_ques = open('preprocessed/v1/ques_val.txt','rb').read().decode('utf-8').splitlines()
val_ans  = open('preprocessed/v1/answer_val.txt','rb').read().decode('utf-8').splitlines()

In [None]:
label_encoder = pk.load(open('preprocessed/v1/label_encoder_lstm.sav','rb'))

In [None]:
y_pred = []
batch_size = 128 

#print ("Word2Vec Loaded!")

widgets = ['Evaluating ', Percentage(), ' ', Bar(marker='#',left='[',right=']'), ' ', ETA()]
pbar = ProgressBar(widgets=widgets)

In [None]:
for qu_batch,an_batch,im_batch in pbar(zip(grouped(val_ques, batch_size, 
                                                   fillvalue=val_ques[0]), 
                                           grouped(val_ans, batch_size, 
                                                   fillvalue=val_ans[0]), 
                                           grouped(val_imgs, batch_size, 
                                                   fillvalue=val_imgs[0]))):
    timesteps = len(nlp(qu_batch[-1]))
    X_ques_batch = get_questions_tensor_timeseries(qu_batch, nlp, timesteps)
    X_i_batch = get_images_matrix(im_batch, id_map, img_features)
    X_batch = [X_ques_batch, X_i_batch]
    y_predict = model.predict_classes(X_batch, verbose=0)
    y_pred.extend(label_encoder.inverse_transform(y_predict))

In [None]:
correct_val = 0.0
total = 0
f1 = open('res.txt','w')

for pred, truth, ques, img in zip(y_pred, val_ans, val_ques, val_imgs):
    t_count = 0
    for _truth in truth.split(';'):
        if pred == truth:
            t_count += 1 
    if t_count >=1:
        correct_val +=1
    else:
        correct_val += float(t_count)/3

    total +=1

    try:
        f1.write(str(ques))
        f1.write('\n')
        f1.write(str(img))
        f1.write('\n')
        f1.write(str(pred))
        f1.write('\n')
        f1.write(str(truth))
        f1.write('\n')
        f1.write('\n')
    except:
        pass

print ("Accuracy: ", round((correct_val/total)*100,2))
#f1.write('Final Accuracy is ' + str(round(correct_val/total),2))
f1.close()

In [None]:
%cd src

In [None]:
!python test.py