In [0]:
from google.colab import drive
drive.mount('/content/drive/')


In [0]:
import numpy as np 
import pandas as pd 

import os
for dirname, _, filenames in os.walk('/content/drive/My Drive/Flickr_Data'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        

In [0]:
#!pip install matplotlib
import matplotlib.pyplot as plt
#!pip install tensorflow==2.0
%matplotlib inline
from pickle import dump
import tensorflow.keras as keras
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
# for using pretrained models and deploying the new model acc. to the needs
from keras.models import Model
import h5py


In [0]:
# mapping the required versions
!pip show tensorflow 
!pip list | grep keras

import os

!pip show keras
!pip show tensorflow
!pip show sklearn
!pip show pillow


In [0]:
# distributing the file dir data and generating the sequence datagram file 
# by using pre-trained open community VGGNet algorithm

print(os.listdir('../content/drive/My Drive/Flickr_Data'))

root, dirs, imgs = next(os.walk('../content/drive/My Drive/Flickr_Data/Images', topdown=True))
print(root)

path, dir, directory = next(os.walk('../content/drive/My Drive/Flickr_Data/Flickr_TextData'))
print(imgs,"\n")
print(directory)

imgCount = len(imgs)
dirCount = len(directory)

print("Image Counted : %d" % imgCount)
print("Directory Iterated : %d" %dirCount)


In [0]:
from sklearn.preprocessing import LabelEncoder
import random
import PIL
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array

# bursting the loaded images in the random order
img_path = '../content/drive/My Drive/Flickr_Data/Images/'
max_img = len(imgs)
overRand = random.randint(0,max_img)
print("Input-Image ID picked: ",overRand)
print("Initial Input : %s" %imgs[overRand])
image = load_img(img_path + imgs[overRand])
image


In [0]:
# seeking the shape of the input image
image = img_to_array(image)
image.shape
print(image)

# reshaping the data into single sample image and preparing the sample for the VGGNet16( pre-trained network)

from keras.applications.vgg16 import preprocess_input
image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2])
resnet_image = preprocess_input(image)
print(resnet_image)
resnet_image.shape

image_res = load_img(img_path + imgs[overRand], target_size=(255,255))
image_res


In [0]:
# using the predefined model structure to extract features corresponding to each image

def featuresExt(fileSrc):
    # restructing the resnet50 model
    modelMap = ResNet50()
    modelMap.layers.pop()
    
    modelMap = Model(inputs = modelMap.inputs, outputs = modelMap.layers[-1].output)
    
    print(modelMap.summary())
    
    # extracting the features from the images and storing in unzip dict()
    
    traceExt = dict()
    
    for file in os.listdir(fileSrc):
        filename = fileSrc  +  '/' + file
        image = load_img(filename, target_size=(224,224))
        image = img_to_array(image)
        
        image = image.reshape(1, image.shape[0], image.shape[1], image.shape[2])
        
        # parsing the image for resnet50 model training
        image = preprocess_input(image)
        imageId = file.split('.')[0]
        
        # extracting the features out of the images
        trace = modelMap.predict(image, verbose = 1)
        traceExt[imageId] = trace
        
        print("Image read > %s" %file)
        
    return traceExt
    

In [0]:
fileSrc = "../content/drive/My Drive/Flickr_Data/Images/"
traces = featuresExt(fileSrc)

print("In-total features extracted for the supplied Image dataset : %d" %len(traces))

# saving the output to the traceExt file
#file = open(traceExt.pkl, 'wb')
dump(traces, open('traceExt.pkl', 'wb'))


!head -n 100 traceExt.pkl


In [0]:
!pip show tensorflow 
!pip show keras
!pip show numpy
!pip show pickle
!pip show matplotlib


In [0]:
from numpy import array
from pickle import load
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# initial all necessary workflow library needed according to Tf 1.x again

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical
from keras.utils import plot_model
from keras.models import Model
from keras.layers import Dense
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Embedding 
from keras.layers import Dropout
from keras.layers.merge import add
from keras.callbacks import ModelCheckpoint # for plotting the loss-access function tabular graph
import tensorflow.keras as keras


In [0]:
# making the instance of data in the memory

def loadDoc(file):
    fl = open(file, 'r')
    text = fl.read()
    fl.close()
    
    return text

# loading the pre-trained photo identifier

def loadData(file):
    data = loadDoc(file)
    dataset = list()
    
# iterating the pre-trained identifier and get the image identifier
    
    for instance in data.split('\n'):
        if len(instance) < 1:
            continue
            
        identifier = instance.split('.')[0]
        dataset.append(identifier)
        
    return set(dataset)

# loading the pre-trained description image identifier

def loadDesc(file, dataset):
    data = loadDoc(file)
    description = dict()
    # iterate the data and split the description where white space encountered
    for line in data.split('\n'):
        token = line.split()
        image_id, image_desc = token[0], token[1:]
        # map the image identifier with the description data 
        if image_id in dataset:
            if image_id not in description:
                description[image_id] = list()
            # wrapping the description to instance token    
            desc = 'startseq'+ ' '.join(image_desc) + 'endseq'
            # store the desc with the corresponding image_id
            description[image_id].append(desc)
                    
    return description

# converting the clean dictionary description to list format

def desc_lines(description):
    list_desc = list()
    for key in description.keys():
        for desc in description[key]:
            list_desc.append(desc)
            
    return list_desc

# calculating the description length with most occuring words

def desc_length(description):
    lines = desc_lines(description)
    
    return max(len(desc.split()) for desc in lines)

# loading image features for model training

def load_features(file, dataset):
    features_all = load(open(file, 'rb'))
    # store the mapped features along with image_id into dict
    feature = {image : features_all[image] for image in dataset}
    
    return feature



In [0]:
# modelling sequence acc. to the paper algorithm and train-model derivation using Encoder-Decoder Architecture

# creating the image sequence model with input sequences and output words

def initiate_sequences(tokenizer, desc_length, desc_list, photo):
    a, b, y = list(), list(), list()
    
    #traversing each description for output image
    for desc in desc_list:
        # encoding the sequence
        seq = tokenizer.texts_to_sequences([desc])[0]
        # split the sequence into multiple instance pair
        for i in range(1, len(seq)):
            # split into input and output pairs
            input_seq, output_seq = seq[:i], seq[i]
            input_seq = pad_sequences([input_seq], maxlen= desc_length)[0]
            #encode the output sequence
            output_seq = to_categorical([output_seq],num_classes=vocabulary_size)[0]
            # store the experimental outputs
            a.append(photo)
            b.append(input_seq)
            y.append(output_seq)
     # importing the array obj method from numpy       
    return array(a), array(b), array(y)

# defining the realedge captioning model

def caption_model(vocabulary_size, desc_length):

# initialising the feature extractors
    inputs1 = Input(shape=(2048,))
    feat_ext1 = Dropout(0.5)(inputs1)
    feat_ext2 = Dense(256, activation='relu')(feat_ext1)
    # initialinsing the sequence extractor
    inputs2  = Input(shape=(desc_length,))
    seq_ext1 = Embedding(vocabulary_size, 256, mask_zero=True)(inputs2)
    seq_ext2 = Dropout(0.5)(seq_ext1)
    seq_ext3 = LSTM(256)(seq_ext2) # using the LSTM gates logic for storing data
    # defining the 3-layer decoder model over the initialised extractors
    #dec_model = add(Flatten(input_shape = (52,52,1)))
    dec_model1 = add([feat_ext2, seq_ext3])
    dec_model2 = Dense(256, activation='relu')(dec_model1)
    dec_model3 = Dropout(0.5)(dec_model2) # lr =0.5
    
    outputs = Dense(vocabulary_size, activation='softmax')(dec_model3)
    # publish the [input,[image,[words]]] together in model
    model = Model(inputs=[inputs1, inputs2], outputs=outputs)
    # performing the compiling task 
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # ploting the model output and making sumarization of the flown input seq
    model.summary()
    plot_model(model, to_file='model_shape.png', show_shapes=True) # to feature the flow stances of the training model
    
    return model

    

In [0]:
# visualization block activity tokens
# token generation and datagram block for clean captioning

# now fitting the tokenizer to the image captioning

from keras.callbacks import ModelCheckpoint
import tensorflow as tf 

def fit_tokenizer(description):
    fit_line = desc_lines(description)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(fit_line)
    return tokenizer

# data generator- the predefined method used in call to model.fit_generator()

def data_generator(description, photos, tokenizer, desc_length):
    # loop over images
    while 1:
        for key, desc_list in description.items():
            #reteiving the photo features
            photo = photos[key][0]
            in_img, in_seq, out_img = initiate_sequences(tokenizer, desc_length, desc_list, photo) # pre-defined over-rulling function
            yield [[in_img,in_seq], out_img]
                 

# visualization - defining the class value method to trace the training and epochs activity

class valueLosses(tf.keras.callbacks.Callback):
    
    def begin_train(self, logs={}):
        self.i = 0
        self.x = []
        self.loss = []
        self.loss_values = []
        self.figure = plt.figure()
        self.logs = []
        
    def end_epochs(self, epoch, logs={}):
        self.logs.append(logs)
        self.loss.append(logs.get('loss'))
        self.x.append(self.i)
        self.loss_values.append(logs.get('val_loss'))
        self.i += 1
        
        # throwing the plots corresponding to the loss and val_loss for training offset and for the defined model epochs
        plt.plot(self.x, self.loss, label='loss')
        plt.plot(self.x, self.loss_values, label='val_loss')
        plt.legend()
        plt.show()
        
losses = valueLosses()
        

In [0]:
# initiating the model training 
# loading the 8kflickcfeature trainImage dataset

file = '../content/drive/My Drive/Flickr_Data/Flickr_TextData/Flickr_8k.trainImages.txt'
train = loadData(file)
print("Training Data:")
print(train)
print("Dataset loaded : %d" % len(train))
# loading the clean description corresponding to the training data
desc_train = loadDesc('../content/drive/My Drive/Flickr_Data/descriptions.txt', train)
print("Description corresponding to training data:")
print(desc_train)
print("Description on-Train : %d" %len(desc_train))
file = '../content/drive/My Drive/Flickr_Data/Flickr_TextData/Flickr_8k.devImages.txt'
validate = loadData(file)
print("Validating Data:")
print(validate)
print('Validated data loaded : %d' %len(validate))
desc_validation = loadDesc('../content/drive/My Drive/Flickr_Data/descriptions.txt', validate)
print("Description corresponding to Validation Data:")
print(desc_validation)
print('Description on-Validation : %d' %len(desc_validation))

# extracting features trained using the image dataset and loading the pkl file

ftr_train = load_features('../content/drive/My Drive/Flickr_Data/traceExt.pkl', train)
print("Traced Features Extracted")
print(ftr_train)

print("Model trained over features(Training Photos) : %d" %len(ftr_train))

ftr_validate = load_features('../content/drive/My Drive/Flickr_Data/traceExt.pkl', validate)
print(ftr_validate)

print("Model validation over features(Testing photos) : %d" %len(ftr_validate))


# creating the tokens from the trained features for the better testing results

tokenizer = fit_tokenizer(desc_train)
print("Tokenizer for the Clean Descriptions:")
print(tokenizer)

# size of the vocabulary/ tokens
vocabulary_size = len(tokenizer.word_index) + 1
print("Vocabulary size for the Image Dataset :%d " % vocabulary_size)

# maximum sequence length for the trained data
max_length = desc_length(desc_train)
#print(max_desc)
print("Sequence Length of the Data : %d" %(max_length))



In [0]:
# defining the arguments for training the ResNet50 over the VGG16 features predictions
# stating the elemental sources to efficiently and wisely train the model for better predictions with steepest accuracy

model = caption_model(vocabulary_size, max_length)
# defining the least epochs for revised training of the ResNet50
#print(len(desc_train))

steps = len(desc_train)
val_steps = len(desc_validation)

generator = data_generator(desc_train, ftr_train, tokenizer, max_length)
val_generator = data_generator(desc_validation, ftr_validate, tokenizer, max_length)

# creating the full fledged model and creating the History.history object
history = model.fit_generator(generator, epochs=31, steps_per_epoch=steps, verbose=1, callbacks=[losses], validation_data=val_generator, validation_steps=val_steps)
    
    # saving the models trained over features in mutliple proceedings
    # to further perform evaluations and prediction verbosity
    
model.save_weights("featureModel_ResNet.h5")
