[View in Colaboratory](https://colab.research.google.com/github/SwapnilSParkhe/keras-flask-deploy-webapp/blob/master/Developing_Model.ipynb)

# Developing Model:

 - Building Model Data (ADS) and Architecture 
 - Fitting built Model to Data
 - Evaluating Model Performance

**Checking GPU status**

In [13]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 5257096417000077520, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 11288900404
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 7908338663757240352
 physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7"]

## Analytical Dataset (ADS) Creation

**Image ID or Identifiers**

In [14]:
#Uploading relevant files from local to cloud (using google.colab lib)

#Library for Uploading data from local to cloud
from google.colab import files

#Upload train image text
files.upload()   #upload files 

#Upload valid image text
files.upload()   #upload files

#Upload test image text
files.upload()   #upload files

{}

In [0]:
#Importing file: reading the content into Py file
def import_file(input_file):
    file=open(input_file,'r')   #creating a bridge btwn OS and Py files
    content=file.read()   #reading content via the bridge
    file.close()   #closing the bridge
    return content

imported_train=import_file('Flickr_8k.trainImages.txt') 
imported_valid=import_file('Flickr_8k.devImages.txt') 
imported_test=import_file('Flickr_8k.testImages.txt')    

#Creating a set of image-IDs
def create_img_set(file):
    imgID_set=list()
    for item in file.split('\n'):   #accessing line by line
        if len(item)<1:   #rejecting empty spaces
            continue
        imgID=item.split('.')[0]   #only taking imgID (rejecting 'jpg')
        imgID_set.append(imgID)   #appending imgIDs to imgID_set
    return set(imgID_set)

imgID_trainset=create_img_set(imported_train)
imgID_validset=create_img_set(imported_valid)
imgID_testset=create_img_set(imported_test)

**Importing previously created files (from PreprocessingData NoteBook): Img desc and Img features**

In [4]:
#Uploading relevant files from local to cloud (using google.colab lib)
#Note: Preprocessing Desc and Feat was done solely on training data files

#Library for Uploading data from local to cloud
from google.colab import files

#Upload cleaned organised text file (from Text precprosssing step)
files.upload()   #upload files 

#Upload features file (from Image preprocessing step)
files.upload()   #upload files

Saving cln_orgnse_text.txt to cln_orgnse_text.txt


Saving features.pkl to features.pkl


In [0]:
#Importing image desc files for this image data germane to training set
def import_prepro_desc(prepro_file, dataset):
    file=import_file(prepro_file)
    desc=dict()
    for item in file.split('\n'):
        tokens=item.split()   #splitting by whitespaces
        image_ID,image_desc=tokens[0],tokens[1:]   #separating ID, desc
        if image_ID in dataset:   #inner join imgID & training imgID 
            if image_ID not in desc:   #new list for new image_ID key 
                desc[image_ID]=list()
            desc_='startseq ' + ' '.join(image_desc)+' endseq' # tokens
            desc[image_ID].append(desc_)
    return desc

desc_train=import_prepro_desc('cln_orgnse_text.txt',imgID_trainset)
desc_valid=import_prepro_desc('cln_orgnse_text.txt',imgID_validset)
desc_test=import_prepro_desc('cln_orgnse_text.txt',imgID_testset)

#Importing image features for this image data germane to training set
from pickle import load
def import_features(feature_file, dataset):
    all_features = load(open(feature_file, 'rb'))  #load all features
    features = {k: all_features[k] for k in dataset} #inner join
    return features

feature_train=import_features('features.pkl',imgID_trainset) #used later
feature_valid=import_features('features.pkl',imgID_validset) #used later
feature_test=import_features('features.pkl',imgID_testset) #used later

**Training data manipulations: Creating a custom Tokeizer function: Tokenizing descriptions**

In [24]:
#Creating a simple list of desc from dict of desc
def dict2list(input_dict):
    desc_list=list()
    for key in input_dict.keys():
        [desc_list.append(d) for d in input_dict[key]]
    return desc_list

desc_train_list=dict2list(desc_train)

#tokeinizing (could be improved by filetring english stopwords later)
#Note: turning each text into sequence of integers (integer: token ID)
from keras.preprocessing.text import Tokenizer
from pickle import dump
def tokenize(input_list):
    tokenizer=Tokenizer()
    tokenizer.fit_on_texts(input_list)
    return tokenizer

tokenizer=tokenize(desc_train_list) #to be used later
vocab_size=len(tokenizer.word_index)+1 #to be used later
print("Vocab Size:",vocab_size)
dump(tokenizer, open('tokenizer.pkl', 'wb')) #to be used later
from google.colab import files
files.download('tokenizer.pkl') #Download Tokenizer (used later)

#Length of the description with the most words
def max_length(desc_list):
    max_len=max([len(item.split()) for item in desc_list])
    return max_len
max_length = max_length(desc_train_list) #to be used later
print('Description Length', max_length)

#Longest desc check
def longest_desc(desc_list):
    max_length=max([len(item.split()) for item in desc_list])
    print("Max_len:",max_length)
    print("Desc:", [item for item in desc_list if len(item.split())==max_length])

longest_desc(desc_train_list)

Vocab Size: 7266
Description Length 33
Max_len: 33
Desc: ['startseq an man wearing green sweatshirt and blue vest is holding up dollar bills in front of his face while standing on busy sidewalk in front of group of men playing instruments endseq']


**LSTM's Analytical Dataset: Input(ImageID and Seq_item)-Ouput(SeqWord) data**

In [0]:
#Creating ADS for LSTM: Input(Image_ID and Seq_item)-Ouput(SeqWord)
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
import numpy as np
def create_ADS(tokenizer, max_length, desc_list, img):
    X_img_ID, X_desc_item, y=list(), list(), list()
    for desc in desc_list:
        seq=tokenizer.texts_to_sequences([desc])[0] #encoding seq
        for i in range(1,len(seq)):#split seq into multi X,y pairs
            in_seq, out_seq=seq[:i], seq[i] #desc input-output pair
            in_seq=pad_sequences([in_seq], maxlen=max_length)[0]
            out_seq=to_categorical([out_seq], num_classes=vocab_size)[0]
            X_img_ID.append(img) #appending  img IDs
            X_desc_item.append(in_seq)  #multi X-y pairs encoding
            y.append(out_seq)   #oneHot encoded version of output word
    return np.array(X_img_ID), np.array(X_desc_item), np.array(y)

#Progressive Data Loading: Generate data (yield one photo’s data/batch) 
#Note: intended to be used in a call to model.fit_generator()
def generate_data(tokenizer, max_length, desc_dict, img):
    while 1:   #loop for ever over images
        for key, desc_list in desc_dict.items(): #access image feature
            img_ = img[key][0]  #image ID
            in_img,in_seq,out_word=create_ADS(tokenizer,
                                              max_length,
                                              desc_list,img_)
            yield [[in_img, in_seq], out_word]

## Defining the Model Architecture (Merge Model of Embeddings+LSTMs with CNN penultimate layer)
**Note:** Combines both the encoded form (features) of the image input with the encoded form (context) of the text description generated so far; Combination of these two encoded inputs is then used by a very simple decoder model to generate the next word in the sequence

![Merge Model of Image Captioning](https://i.pinimg.com/originals/35/8b/dc/358bdc11e71f8c78632560c7c819919d.png)

**Importing relevant libraries**

In [0]:
from keras.layers import Input, Dropout, Dense #feat. encoding
from keras.layers import Embedding, Dropout, LSTM #desc. encoding
from keras.layers.merge import add #decoding
from keras.models import Model #Model-Input-Output architecture

In [29]:
def build_model_arch(vocab_size, max_length):
    #Encoder Models (Img-Feat and Desc Encoding)
    #1.Image feature extractor model
    feat_input=Input(shape=(4096,))
    feat_1=Dropout(0.5)(feat_input)
    feat_2=Dense(300, activation='relu')(feat_1)

    #2.Embedding+LSTM sequence model
    desc_input=Input(shape=(max_length,))
    desc_1=Embedding(vocab_size, 300, mask_zero=True)(desc_input)
    desc_2=Dropout(0.5)(desc_1)
    desc_3=LSTM(300)(desc_2)

    #Decoder Model ('adding' above encoding model layers; with FFNs)
    deco_1=add([feat_2, desc_3]) #adding element wise for both vectors
    deco_2=Dense(300, activation='relu')(deco_1)
    output=Dense(vocab_size, activation='softmax')(deco_2)

    #Creating Model-Input-Output architecture; Compiling (with loss, opt.)
    model=Model(inputs=[feat_input, desc_input], outputs=output)
    model.compile(loss='categorical_crossentropy', optimizer='adam')

    #Summarizing and Plotting model
    print(model.summary())
    return model

model = build_model_arch(vocab_size, max_length)

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 33)           0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 4096)         0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 33, 300)      2179800     input_4[0][0]                    
__________________________________________________________________________________________________
dropout_3 (Dropout)             (None, 4096)         0           input_3[0][0]                    
__________________________________________________________________________________________________
dropout_4 

## Fitting Model to train data (validation data to balance bias-variance)

In [0]:
#Defining checkpoint callback; specifying model hyperparams
from keras.callbacks import ModelCheckpoint
filepath = 'best_model_weights.h5'
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, 
                             save_best_only=True, mode='min')
epochs_=10
steps_train=len(desc_train)  #steps=N/batch_size
steps_valid=len(desc_valid)  #steps=N/batch_size

#Fitting model to generated data (along side validation loss checks)
generated_data_train=generate_data(tokenizer, max_length, 
                                   desc_train, feature_train)
generated_data_valid=generate_data(tokenizer, max_length, 
                                   desc_valid, feature_valid)
model.fit_generator(generated_data_train, epochs=epochs_,
                    steps_per_epoch=steps_train,
                    validation_data=generated_data_valid,
                    validation_steps=steps_valid,
                    callbacks=[checkpoint], verbose=1)

In [0]:
#Downloading Best model (weights)
from google.colab import files
files.download('best_model_weights.h5')

In [0]:
####################################################################
#Training manually (if above code hangs or get stuck)
epochs=20
steps=len(desc_train)  #steps=N/batch_size
for epoch in range(epochs):
    generated_data_train=generate_data(tokenizer, max_length, 
                                       desc_train, feature_train)
    generated_data_valid=generate_data(tokenizer, max_length, 
                                       desc_valid, feature_valid)
    model.fit_generator(generated_data_train,steps_per_epoch=steps, 
                        validation_data= generated_data_valid,
                        epochs=1, verbose=1)
    model.save('model_' + str(epoch) + '.h5')
####################################################################

## Evaluating the Model (on validaiton data using BLEU score)

**Generating description for a photo using the trained model (for a given tokenizer on train data)**

Note: passing in the start description token ‘start_seq‘, generating one word, then calling the model recursively with generated words as input until the end of sequence token is reached ‘end_seq‘ or the maximum description length is reached

In [0]:
#Mapping an integer prediction back to a word
#Note: Using the same tokeniser used for train data
def intID_to_word(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index==integer:
            return word
    return None

#Generating a desc for an image using trained model
from numpy import argmax
from keras.preprocessing.sequence import pad_sequences
def generate_desc(model, tokenizer, image, max_length):
    in_text='startseq'   #seeding the generation process
    for i in range(max_length):
        seq=tokenizer.texts_to_sequences([in_text])[0] #encoding txt2int
        seq=pad_sequences([seq], maxlen=max_length) #padding seq
        pred=model.predict([image,seq], verbose=0)  #predict using model
        pred=argmax(pred)   #prob to integer ID conversion
        word=intID_to_word(pred, tokenizer) #intID to word mapping
        if word is None:
            break   #stop if cant map word
        in_text += ' ' + word  #append as input to generate next word
        if word=='endseq':
            break   #stop if end of seq
    return in_text

**Evaluating model**

Note: for a given set of photo desc and photo features (on the validation data, could use test data later)

In [43]:
#Importing the nltk librarie's BLEU score evaluator
from nltk.translate.bleu_score import corpus_bleu

#Function for Evaluating model
def evaluate_model(model, desc_dict, image, tokenizer, max_length):
    actual, predicted = list(), list()
    for key, desc_list in desc_dict.items():
        pred_=generate_desc(model, tokenizer, image[key], max_length)
        act_=[desc.split() for desc in desc_list]
        #print(pred_,'\n',act_,'\n') #checking what's getting generated
        predicted.append(pred_.split())
        actual.append(act_)
    #calculating BLEU score
    print('BLEU-1:%f' % corpus_bleu(actual,predicted,weights=(1.0,0,0,0)))
    print('BLEU-2:%f' % corpus_bleu(actual,predicted,weights=(0.5,0.5,0,0)))
    print('BLEU-3:%f' % corpus_bleu(actual,predicted,weights=(0.3,0.3,0.3,0)))
    print('BLEU-4:%f' % corpus_bleu(actual,predicted,
                                    weights=(0.25,0.25,0.25,0.25)))
    
#Uploading, Loading and using best saved model
from google.colab import files
from keras.models import load_model
  #files.upload()
best_model = load_model('best_model_weights.h5')

#Checking Performance on both valid and test data
print('For valid data:', evaluate_model(best_model,desc_valid,feature_valid,tokenizer,max_length))
print('\n')
print('For test data:', evaluate_model(best_model,desc_test,feature_test,tokenizer,max_length))

BLEU-1:0.552174
BLEU-2:0.298463
BLEU-3:0.206645
BLEU-4:0.099745
For valid data: None


BLEU-1:0.558602
BLEU-2:0.306019
BLEU-3:0.213852
BLEU-4:0.104576
For test data: None
