## Imports

In [None]:
#matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

import numpy as np
import os
import nltk
from PIL import Image
from cache import cache
from tensorflow.python import keras

In [None]:
from keras import backend as K
from keras import layers, models, optimizers, callbacks
from keras.models import Model
from keras.layers import Input, Dense, GRU, Embedding, Conv2D, Layer, concatenate
from keras.applications import VGG16
from keras.optimizers import RMSprop
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.applications import ResNet50
from keras.applications import InceptionV3

In [None]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [None]:
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

config = tf.ConfigProto()
sess = tf.Session(config=config)
tf.device(tf.DeviceSpec(device_type="GPU", device_index="0"))

In [None]:
from tensorflow.python.client import device_lib
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))
print(device_lib.list_local_devices())

In [None]:
tf.device("/device:GPU:0")
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

In [None]:
tf.__version__

In [None]:
import coco

In [None]:
coco.download

In [None]:
_, filenames_train, captions_train =coco.load_records(train=True)

In [None]:
np.asanyarray(captions_train).shape

In [None]:
# _, filenames_test, captions_test = coco.load_records(train=False)
_, filenames_val, captions_val = coco.load_records(train=False)

In [None]:
num_images_train = len(filenames_train)
print("number of train images : ",num_images_train)
num_caption_train = len(captions_train)
print("number of train captions : ",num_caption_train)
num_images_val = len(filenames_val)
print("number of validation images : ",num_images_val)
num_caption_val = len(captions_val)
print("number of validation captions : ",num_caption_val)

In [None]:
filenames_test=filenames_val
captions_test=captions_val

In [None]:
num_images_val = len(filenames_val)
num_images_val

In [None]:
def load_image(path, size=None):
    

  
    img = Image.open(path)

    if not size is None:
        img = img.resize(size=size, resample=Image.LANCZOS)


    img = np.array(img)


    img = img / 255.0

   
    if (len(img.shape) == 2):
        img = np.repeat(img[:, :, np.newaxis], 3, axis=2)

    return img

In [None]:
def show_image(idx, train):
   

    if train:
        
        dir = coco.train_dir
        filename = filenames_train[idx]
        captions = captions_train[idx]
    else:
       
        dir = coco.val_dir
        filename = filenames_val[idx]
        captions = captions_val[idx]

 
    path = os.path.join(dir, filename)


    for caption in captions:
        print(caption)
    
 
    img = load_image(path)
    plt.imshow(img)
    plt.show()

In [None]:
show_image(idx=100, train=True)

In [None]:
image_model = InceptionV3()
print (image_model.summary())
transfer_layer=image_model.get_layer('avg_pool')





In [None]:
import h5py
import pickle
import argparse
# from keras import  optimizers, callbacks
from tensorflow.keras.utils import to_categorical
from utils3 import combine_images,plot_log
from capsule_layers import CapsuleLayer, PrimaryCap, Length, Mask

K.set_image_data_format('channels_last')


def CapsNet(input_shape, n_class, routings):

    x = Input(shape=input_shape)

    
    conv1 = Conv2D(filters=96, kernel_size=13, strides=4, padding='valid', activation='relu', name='conv1')(x)
    conv2 = Conv2D(filters=96, kernel_size=5, strides=2, padding='valid', activation='relu', name='conv2')(conv1)
    conv3 = Conv2D(filters=256, kernel_size=9, strides=1, padding='valid', activation='relu', name='conv3')(conv2)


  
    primary_caps = PrimaryCap(conv3, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid')

    
    category_caps = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings,name='category_caps')(primary_caps)

   
    out_caps = Length(name='capsnet')(category_caps)

   
    y = Input(shape=(n_class,))
    masked_by_y = Mask()([category_caps, y])  training
    masked = Mask()(category_caps) 

    
    decoder = models.Sequential(name='decoder')
    decoder.add(Dense(512, activation='relu', input_dim=16*n_class))
    decoder.add(Dense(1024, activation='relu'))

    
  
    train_model = Model([x, y], [out_caps, decoder(masked_by_y)])
    eval_model = Model(x, [out_caps, decoder(masked)])


    noise = Input(shape=(n_class, 16))
    noised_category_caps = layers.Add()([category_caps, noise])
    masked_noised_y = Mask()([noised_category_caps, y])
    manipulate_model =Model([x, y, noise], decoder(masked_noised_y))

    return train_model, eval_model, manipulate_model

def margin_loss(y_true, y_pred):
    
    L = y_true * K.square(K.maximum(0., 0.9 - y_pred)) + \
        0.5 * (1 - y_true) * K.square(K.maximum(0., y_pred - 0.1))

    return K.mean(K.sum(L, 1))


def train(model, data, args):

    (x_train, y_train), (x_test, y_test) = data

  
    log = callbacks.CSVLogger(args['save_dir'] + '/log.csv')
    tb = callbacks.TensorBoard(log_dir=args['save_dir'] + '/tensorboard-logs',
                               batch_size=args['batch_size'], histogram_freq=int(args['debug']))
    checkpoint = callbacks.ModelCheckpoint(args['save_dir'] + '/weights-{epoch:02d}.h5', monitor='val_capsnet_acc',
                                           save_best_only=True, save_weights_only=True, verbose=1)
    lr_decay = callbacks.LearningRateScheduler(schedule=lambda epoch: args['lr'] * (args['lr_decay'] ** epoch))
    early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=3, verbose=1)

   

    model.compile(optimizer=optimizers.Adam(lr=args['lr']),
                  loss=[margin_loss, 'mse'],
                  loss_weights=[1., args['lam_recon']],
                  metrics={'capsnet': 'accuracy'})

    

    model.fit(
        [x_train, y_train],
        [y_train, x_train],
        batch_size=args['batch_size'],
        epochs=args['epochs'],
        validation_data=[[x_test, y_test], [y_test, x_test]],
        callbacks=[log, tb, checkpoint, lr_decay, early_stop]
    )

def test(model, data, args):
    x_test, y_test = data
    y_pred, x_recon = model.predict(x_test, batch_size=100)
    print('-' * 30 + 'Begin: test' + '-' * 30)
    print('Test acc:', np.sum(np.argmax(y_pred, 1) == np.argmax(y_test, 1)) / y_test.shape[0])

    img = combine_images(np.concatenate([x_test[:50], x_recon[:50]]))
    image = img * 255
    Image.fromarray(image.astype(np.uint8)).save(args.save_dir + "/real_and_recon.png")
    print()
    print('Reconstructed images are saved to %s/real_and_recon.png' % args.save_dir)
    print('-' * 30 + 'End: test' + '-' * 30)
    plt.imshow(plt.imread(args.save_dir + "/real_and_recon.png"))
    plt.show()


def manipulate_latent(model, data, args):
    print('-' * 30 + 'Begin: manipulate' + '-' * 30)
    x_test, y_test = data
    index = np.argmax(y_test, 1) == args.digit
    number = np.random.randint(low=0, high=sum(index) - 1)
    x, y = x_test[index][number], y_test[index][number]
    x, y = np.expand_dims(x, 0), np.expand_dims(y, 0)
    noise = np.zeros([1, 80, 16])
    x_recons = []
    for dim in range(16):
        for r in [-0.25, -0.2, -0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15, 0.2, 0.25]:
            tmp = np.copy(noise)
            tmp[:, :, dim] = r
            x_recon = model.predict([x, y, tmp])
            x_recons.append(x_recon)

    x_recons = np.concatenate(x_recons)

    img = combine_images(x_recons, height=16)
    image = img * 255
    Image.fromarray(image.astype(np.uint8)).save(args.save_dir + '/manipulate-%d.png' % args.digit)
    print('manipulated result saved to %s/manipulate-%d.png' % (args.save_dir, args.digit))
    print('-' * 30 + 'End: manipulate' + '-' * 30)



def load_coco(dataset_file, map_file):
    """
    Load preprocessed MSCOCO 2017 dataset
    """
    print('\nLoading dataset...')
    
 
    h5f = h5py.File(dataset_file, 'r')
    x = h5f['x'][:]
    y = h5f['y'][:]
    h5f.close()

    split = int(x.shape[0] * 0.8)  # 80% of data is assigned to the training set
    x_train, y_train = x[:split], y[:split]
    x_test, y_test = x[split:], y[split:]

    with open(map_file, 'rb') as mapping:
        category_id_map = pickle.load(mapping)
    id_category = category_id_map['id_category']
    print('Done.')

    return (x_train, y_train), (x_test, y_test), id_category


if __name__ == "__main__":


    parser = argparse.ArgumentParser(description="Capsule Network on MSCOCO 2017.")
    parser.add_argument('--epochs', default=1, type=int)
    parser.add_argument('--batch_size', default=100, type=int)
    parser.add_argument('--lr', default=0.001, type=float, help="Initial learning rate")
    parser.add_argument('--lr_decay', default=1, type=float,
                        help="The value multiplied by lr at each epoch. Set a larger value for larger epochs")
    parser.add_argument('--lam_recon', default=0.392, type=float, help="The coefficient for the loss of decoder")
    parser.add_argument('-r', '--routings', default=3, type=int,
                        help="Number of iterations used in routing algorithm. should > 0")  # num_routing should > 0
    parser.add_argument('--debug', action='store_true', help="Save weights by TensorBoard")
    parser.add_argument('--save_dir', default='./result')
    parser.add_argument(
        '--dataset_file', default=os.path.join(os.path.dirname(os.path.abspath('__file__')), 'dataset/capsnet_train_data.h5'),
        help='File having the preprocessed dataset')
    parser.add_argument('-t', '--testing', action='store_true',
                        help='Test the trained model on testing dataset')
    parser.add_argument('--digit', default=5, type=int,
                        help="Digit to manipulate")
    parser.add_argument('-w', '--weights', default=None,
                        help="The path of the saved weights. Should be specified when testing")
    
    parser.add_argument(
        '--map_file', default=os.path.join(os.path.dirname(os.path.realpath('__file__')), 'dataset/coco_raw.pickle'),
        help='File having the id to category map'
    )
    args = parser.parse_known_args()[0]
    print(args)

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    (x_train, y_train), (x_test, y_test), id_category = load_coco(args.dataset_file, args.map_file)


    model, eval_model, manipulate_model = CapsNet(
        input_shape=(299, 299, 3),
        n_class=y_train.shape[1],
        routings=args.routings
    )


    model.summary()

  

In [None]:
label_layer = image_model.get_layer('predictions')
label_layer.output

In [None]:
print(eval_model.input)
print(image_model.input)
print(eval_model.output[1])
print(transfer_layer.output)

In [None]:
tf.concat([eval_model.output[1],transfer_layer.output],1)

In [None]:
transfer_layer.output

In [None]:
capsule_model=models.Model(inputs=eval_model.input,outputs=eval_model.output[1])
capsule_model.output

In [None]:

image_model_transfer = models.Model(inputs=image_model.input, outputs=image_model.get_layer('avg_pool').output)
# image_model_transfer= image_model
image_model_transfer.summary()


In [None]:
img_size = K.int_shape(image_model.input)[1:3]
img_size

In [None]:
len(filenames_val)

In [None]:

transfer_values_size=3072

In [None]:
def print_progress(count, max_count):

    pct_complete = count / max_count


    msg = "\r- Progress: {0:.1%}".format(pct_complete)

    sys.stdout.write(msg)
    sys.stdout.flush()

In [None]:
def process_images(data_dir, filenames, batch_size=32):
    """
    Process all the given files in the given data_dir using the
    pre-trained image-model and return their transfer-values.
    
    Note that we process the images in batches to save
    memory and improve efficiency on the GPU.
    """

    num_images = len(filenames)

    shape = (batch_size,) + img_size + (3,)
    image_batch = np.zeros(shape=shape, dtype=np.float16)

    shape = (num_images, transfer_values_size)
    transfer_values = np.zeros(shape=shape, dtype=np.float16)


    start_index = 0


    while start_index < num_images:

        print_progress(count=start_index, max_count=num_images)

   
        end_index = start_index + batch_size

     
        if end_index > num_images:
            end_index = num_images


        current_batch_size = end_index - start_index

        for i, filename in enumerate(filenames[start_index:end_index]):
           
            path = os.path.join(data_dir, filename)


            img = load_image(path, size=img_size)

            
            image_batch[i] = img

        
        transfer_values_batch = \
            np.concatenate([image_model_transfer.predict(image_batch[0:current_batch_size]),capsule_model.predict(image_batch[0:current_batch_size])],1)
            
        transfer_values[start_index:end_index] = \
            transfer_values_batch[0:current_batch_size]

        start_index = end_index


    print()

    return transfer_values

In [None]:
def process_images_label(data_dir, filenames, batch_size=32):

    num_images = len(filenames)
     

    shape = (batch_size,) + img_size + (3,)
    shape2 = (batch_size,) + (1000,)
    image_batch = np.zeros(shape=shape, dtype=np.float16)
    shape2 = (num_images,1000)
    label_values = np.zeros(shape=shape2, dtype=np.float16)

    start_index = 0

    while start_index < num_images:

        print_progress(count=start_index, max_count=num_images)

        end_index = start_index + batch_size

        if end_index > num_images:
            end_index = num_images

        current_batch_size = end_index - start_index

        for i, filename in enumerate(filenames[start_index:end_index]):
           
            path = os.path.join(data_dir, filename)


            img = load_image(path, size=img_size)

            image_batch[i] = img


        label_value_batch = image_model.predict(image_batch[0:current_batch_size])

        label_values[start_index:end_index] =label_value_batch[0:current_batch_size]

        start_index = end_index


    print()

    return label_values


In [None]:
def process_images_train():
    print("Processing {0} images in training-set ...".format(len(filenames_train)))
    cache_path1 = os.path.join(coco.data_dir,
                              "capsule_inception_train.pkl")
    cache_path2 = os.path.join(coco.data_dir, "label_inception_train.pkl")

    transfer_values = cache(cache_path=cache_path1,
                            fn=process_images,
                            data_dir=coco.train_dir,
                            filenames=filenames_train)

    label_values = cache(cache_path=cache_path2,
                            fn=process_images_label,
                            data_dir=coco.train_dir,
                            filenames=filenames_train)

    return transfer_values,label_values

In [None]:
[transfer_values,label_values]=process_images_train()
print(np.asanyarray(transfer_values).shape)
print(np.asanyarray(label_values).shape)

In [None]:
def process_images_val():
    print("Processing {0} images in validation-set ...".format(len(filenames_val)))

    
    cache_path1 = os.path.join(coco.data_dir, "capsule_inception_val.pkl")
    cache_path2 = os.path.join(coco.data_dir, "label_inception_val.pkl")

   
    transfer_values = cache(cache_path=cache_path1,
                            fn=process_images,
                            data_dir=coco.val_dir,
                            filenames=filenames_val)
    label_values = cache(cache_path=cache_path2,
                            fn=process_images_label,
                            data_dir=coco.val_dir,
                            filenames=filenames_val)
    

    return transfer_values,label_values

In [None]:
[transfer_values,label_values]=process_images_val()
print(np.asanyarray(transfer_values).shape)
print(np.asanyarray(label_values).shape)

In [None]:

import sys

[capsule_inception_train,label_inception_train] = process_images_train()
print("dtype:", capsule_inception_train.dtype)
print("shape:", capsule_inception_train.shape)
print("dtype:", label_inception_train.dtype)
print("shape:", label_inception_train.shape)

In [None]:

[capsule_inception_val,label_inception_val] = process_images_val()
print("dtype:", capsule_inception_val.dtype)
print("shape:", capsule_inception_val.shape)
print("dtype:", label_inception_val.dtype)
print("shape:", label_inception_val.shape)


In [None]:
transfer_values_test=capsule_inception_val
label_test=label_inception_val
temp1=capsule_inception_train
temp2=label_inception_train
temp3=filenames_train
temp4=captions_train

In [None]:
captions_train_train=captions_train
print(capsule_inception_train.shape)
print(label_inception_train.shape)
print(capsule_inception_val.shape)
print(label_inception_val.shape)
print(np.asanyarray(captions_train_train).shape)

In [None]:
np.asanyarray(filenames_val).shape

In [None]:
from keras_applications.inception_v3 import decode_predictions 
yhat=decode_predictions(label_inception_train, top=3, utils=tf.keras.utils)
yhat2=decode_predictions(label_inception_val, top=5, utils=tf.keras.utils)

In [None]:
print((np.asanyarray(yhat))[1,:])

In [None]:
extra_labels=(np.asanyarray(yhat)[:,:,1])
print(extra_labels[0:5,:])

extra_labels.shape

In [None]:

captions_train2=captions_train_train

In [None]:
captions_train2_list=[]
captions_train2_list
capsule_inception_train.shape[0]

In [None]:
import nltk
nltk.download('punkt')

from nltk import word_tokenize,sent_tokenize

In [None]:
import wikipediaapi
wiki_wiki = wikipediaapi.Wikipedia(language='en', extract_format=wikipediaapi.ExtractFormat.WIKI)
path='/home/javanmardis/shima/CapsNet-COCO-master/Caption_file.npy'
captions_train2 = np.load("Caption_file.npy",allow_pickle="True")

    

In [None]:
captions_train2.size

In [None]:
import string
for cc in range (len(captions_train2)):
    captions_train2[cc] = [''.join(c for c in s if c not in string.punctuation) for s in captions_train2[cc]]


In [None]:
captions_train2[100]

In [None]:
mark_start = 'ssss '
mark_end = ' eeee'

In [None]:
captions_train=captions_train2
len(captions_train[1])

In [None]:
def mark_captions(captions_listlist):
    captions_marked = [[mark_start + caption + mark_end
                        for caption in captions_list]
                        for captions_list in captions_listlist]
    
    return captions_marked

In [None]:
captions_train_marked = mark_captions(captions_train)
captions_train_marked[0]

In [None]:
def flatten(captions_listlist):
    captions_list = [caption
                     for captions_list in captions_listlist
                     for caption in captions_list]
    
    return captions_list

In [None]:
captions_train_flat = flatten(captions_train_marked)

In [None]:
num_words = 10000

In [None]:
class TokenizerWrap(Tokenizer):
   
    def __init__(self, texts, num_words=None):
       

        Tokenizer.__init__(self, num_words=num_words)

     
        self.fit_on_texts(texts)

        
        self.index_to_word = dict(zip(self.word_index.values(),
                                      self.word_index.keys()))

    def token_to_word(self, token):
        

        word = " " if token == 0 else self.index_to_word[token]
        return word 

    def tokens_to_string(self, tokens):
        

        
        words = [self.index_to_word[token]
                 for token in tokens
                 if token != 0]
        

        text = " ".join(words)

        return text
    
    def captions_to_tokens(self, captions_listlist):

        
        
        tokens = [self.texts_to_sequences(captions_list)
                  for captions_list in captions_listlist]
        
        return tokens

In [None]:
captions_train_flat

In [None]:
%%time
tokenizer = TokenizerWrap(texts=captions_train_flat,
                          num_words=num_words)

In [None]:
tokenizer.word_index["one"]

In [None]:
token_start = tokenizer.word_index[mark_start.strip()]
token_start

In [None]:
token_end = tokenizer.word_index[mark_end.strip()]
token_end

In [None]:
%%time
tokens_train = tokenizer.captions_to_tokens(captions_train_marked)

In [None]:
tokens_train[0]

In [None]:
captions_train_marked[0]

In [None]:
def get_random_caption_tokens(idx):
    
    
    
    result = []

    
    for i in idx:
        
        j = np.random.choice(len(tokens_train[i]))

       
        tokens = tokens_train[i][j]


        result.append(tokens)

    return result

In [None]:

print(num_images_train)

print(num_caption_train)

In [None]:
np.asanyarray(filenames_train).shape

In [None]:

num_images_train

In [None]:
capsule_inception_train.shape

In [None]:
def batch_generator(batch_size):
    """
    Generator function for creating random batches of training-data.
    
    Note that it selects the data completely randomly for each
    batch, corresponding to sampling of the training-set with
    replacement. This means it is possible to sample the same
    data multiple times within a single epoch - and it is also
    possible that some data is not sampled at all within an epoch.
    However, all the data should be unique within a single batch.
    """


    while True:
       
        idx = np.random.randint(num_images_train,
                                size=batch_size)
        
        
        transfer_values = capsule_inception_train[idx]

        
        tokens = get_random_caption_tokens(idx)

        
        num_tokens = [len(t) for t in tokens]
        
        
        max_tokens = np.max(num_tokens)
        
        
        tokens_padded = pad_sequences(tokens,
                                      maxlen=max_tokens,
                                      padding='post',
                                      truncating='post')
        
        
        decoder_input_data = tokens_padded[:, 0:-1]
        decoder_output_data = tokens_padded[:, 1:]

        
        x_data = \
        {
            'decoder_input': decoder_input_data,
            'transfer_values_input': transfer_values
        }

       
        y_data = \
        {
            'decoder_output': decoder_output_data
        }
        
        yield (x_data, y_data)

In [None]:
batch_size = 32

In [None]:
generator = batch_generator(batch_size=batch_size)

In [None]:
batch = next(generator)
batch_x = batch[0]
batch_y = batch[1]

In [None]:
generator

In [None]:
len(batch_x['transfer_values_input'][0])

In [None]:
len(batch_x['decoder_input'][0])

In [None]:
batch_y['decoder_output'][0].size

In [None]:
tf.convert_to_tensor(batch_x['decoder_input'])

In [None]:
tf.convert_to_tensor(batch_y['decoder_output'])

In [None]:
tf.convert_to_tensor((batch_x['decoder_input']))

In [None]:
num_captions_train = [len(captions) for captions in captions_train]
len(captions_train)

In [None]:
total_num_captions_train = np.sum(num_captions_train)
total_num_captions_train

In [None]:
steps_per_epoch = int(total_num_captions_train / batch_size)
steps_per_epoch

In [None]:
state_size = 512

In [None]:
embedding_size = 128

In [None]:
transfer_values_input = Input(shape=(transfer_values_size,),
                              name='transfer_values_input')

In [None]:
transfer_values_size

In [None]:
decoder_transfer_map = Dense(state_size,
                             activation='tanh',
                             name='decoder_transfer_map')

In [None]:
decoder_input = Input(shape=(None, ), name='decoder_input')
decoder_input.get_shape()

In [None]:
decoder_embedding = Embedding(input_dim=num_words,
                              output_dim=embedding_size,
                              name='decoder_embedding')

In [None]:
decoder_embedding

In [None]:
decoder_gru1 = GRU(state_size, name='decoder_gru1',
                   return_sequences=True)
decoder_gru2 = GRU(state_size, name='decoder_gru2',
                   return_sequences=True)
decoder_gru3 = GRU(state_size, name='decoder_gru3',
                   return_sequences=True)

In [None]:
decoder_dense = Dense(num_words,
                      activation='linear',
                      name='decoder_output')

In [None]:
def connect_decoder(transfer_values):

    initial_state = decoder_transfer_map(transfer_values)

    
    net = decoder_input


    net = decoder_embedding(net)
    

    net = decoder_gru1(net, initial_state=initial_state)
    net = decoder_gru2(net, initial_state=initial_state)
    net = decoder_gru3(net, initial_state=initial_state)

    
    decoder_output = decoder_dense(net)
    
    return decoder_output

In [None]:
transfer_values_input.shape

In [None]:
decoder_output = connect_decoder(transfer_values=transfer_values_input)

decoder_model = Model(inputs=[transfer_values_input, decoder_input],
                      outputs=[decoder_output])

In [None]:
decoder_output


In [None]:
batch_y['decoder_output'].shape

In [None]:
batch_x['decoder_input'].shape

In [None]:
def sparse_cross_entropy(y_true, y_pred):
    

    
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true,
                                                          logits=y_pred)

    
    loss_mean = tf.reduce_mean(loss)

    return loss_mean

In [None]:
optimizer = RMSprop(lr=1e-3)


In [None]:

decoder_target = tf.placeholder(dtype='int32', shape=(None, None))

In [None]:
decoder_model.compile(optimizer=optimizer,
                      loss=sparse_cross_entropy,

                      target_tensors=[decoder_target])


In [None]:
decoder_target

In [None]:
path_checkpoint = '22_checkpoint.keras'
callback_checkpoint = ModelCheckpoint(filepath=path_checkpoint,
                                      verbose=1,
                                      save_weights_only=True)

In [None]:
callback_tensorboard = TensorBoard(log_dir='./22_logs/',
                                   histogram_freq=1,
                                   write_graph=True)

In [None]:
callbacks = [callback_checkpoint, callback_tensorboard]

In [None]:
try:
    decoder_model.load_weights(path_checkpoint)
except Exception as error:
    print("Error trying to load checkpoint.")
    print(error)

In [None]:
generator

In [None]:
decoder_model.summary()

In [None]:
%%time
history=decoder_model.fit_generator(generator=generator,
                            steps_per_epoch=steps_per_epoch,
                            epochs=1,
                            callbacks=callbacks)


In [None]:


plt.plot(history.history['loss'])

plt.ylabel('Loss')
plt.xlabel('Epoch')

plt.show()

In [None]:
def generate_caption(image_path, max_tokens=30):
    
    image = load_image(image_path, size=img_size)
    
   
    image_batch = np.expand_dims(image, axis=0)

    
    transfer_values1 = image_model_transfer.predict(image_batch)
    transfer_values2 = capsule_model.predict(image_batch)
    transfer_values=np.concatenate([transfer_values1,transfer_values2],1)

   
    shape = (1, max_tokens)
    decoder_input_data = np.zeros(shape=shape, dtype=np.int)

   
    token_int = token_start

    
    output_text = ''

    
    count_tokens = 0

    
    while token_int != token_end and count_tokens < max_tokens:
        
        decoder_input_data[0, count_tokens] = token_int

        
        x_data = \
        {
            'transfer_values_input': transfer_values,
            'decoder_input': decoder_input_data
        }

       
       
        decoder_output = decoder_model.predict(x_data)

       
        token_onehot = decoder_output[0, count_tokens, :]

        
        token_int = np.argmax(token_onehot)

        
        sampled_word = tokenizer.token_to_word(token_int)

        
        output_text += " " + sampled_word

        
        count_tokens += 1

   
    output_tokens = decoder_input_data[0]

    

    return output_text

In [None]:
image = load_image("/scratch/shima/data/test2017/000000581763.jpg", size=img_size)
image.shape
image_batch = np.expand_dims(image, axis=0)
image_batch.shape


In [None]:
img2 = load_image("/scratch/shima/data/val2017/000000432898.jpg")
plt.imshow(img2)
plt.show()

generate_caption("/scratch/shima/data/val2017/000000432898.jpg")

In [None]:
def generate_caption_coco(idx, train=True):
    
    
    if train:
        
        data_dir = coco.train_dir
        filename = filenames_train[idx]
        captions = captions_train[idx]
    else:
        
        data_dir = coco.val_dir
        filename = filenames_test[idx]
        captions = captions_test[idx]

    
    path = os.path.join(data_dir, filename)

    
    output_text=generate_caption(image_path=path)
  

   
        
    return output_text,captions

In [None]:
show_image(idx=102, train=False)
generate_caption_coco(idx=102, train=False)

In [None]:
show_image(idx=5, train=False)
generate_caption_coco(idx=5, train=False)

In [None]:
show_image(idx=896, train=False)
generate_caption_coco(idx=896, train=False)

In [None]:
show_image(idx=53, train=False)
generate_caption_coco(idx=53, train=False)

In [None]:
show_image(idx=200, train=False)
generate_caption_coco(idx=200, train=False)

In [None]:
show_image(idx=22, train=False)
generate_caption_coco(idx=22, train=False)

In [None]:
show_image(idx=436, train=False)
generate_caption_coco(idx=436, train=False)

In [None]:
show_image(idx=83,train=False)
generate_caption_coco(idx=83, train=False)

In [None]:
show_image(idx=573, train=False)
generate_caption_coco(idx=573, train=False)

In [None]:
show_image(idx=48, train=False)
generate_caption_coco(idx=48, train=False)

In [None]:
show_image(idx=27, train=False)
generate_caption_coco(idx=27, train=False)

In [None]:
show_image(idx=1368, train=False)
generate_caption_coco(idx=1368, train=False)

In [None]:
show_image(idx=1630, train=False)
generate_caption_coco(idx=1630, train=False)

In [None]:
show_image(idx=4520, train=False)
generate_caption_coco(idx=4520, train=False)

In [None]:
show_image(idx=414, train=False)
generate_caption_coco(idx=414, train=False)

In [None]:
show_image(idx=794, train=False)
generate_caption_coco(idx=794, train=False)

In [None]:
show_image(idx=3045, train=False)
generate_caption_coco(idx=3045, train=False)

In [None]:
show_image(idx=630, train=False)
generate_caption_coco(idx=630, train=False)

In [None]:
show_image(idx=2445, train=False)
generate_caption_coco(idx=2445, train=False)

In [None]:
show_image(idx=3394, train=False)
generate_caption_coco(idx=3394, train=False)

In [None]:
show_image(idx=1178, train=False)
generate_caption_coco(idx=1178, train=False)

In [None]:
show_image(idx=145, train=False)
generate_caption_coco(idx=145, train=False)

In [None]:
show_image(idx=4462, train=False)
generate_caption_coco(idx=4462, train=False)

In [None]:
show_image(idx=3292, train=False)
generate_caption_coco(idx=3292, train=False)

In [None]:
show_image(idx=2921, train=False)
generate_caption_coco(idx=2921, train=False)

In [None]:
show_image(idx=190, train=False)
generate_caption_coco(idx=190, train=False)

In [None]:
show_image(idx=703, train=False)
generate_caption_coco(idx=703, train=False)

In [None]:
show_image(idx=8, train=False)
generate_caption_coco(idx=8, train=False)

In [None]:
show_image(idx=1778, train=False)
generate_caption_coco(idx=1778, train=False)

In [None]:
show_image(idx=606, train=False)
generate_caption_coco(idx=606, train=False)

In [None]:
show_image(idx=983, train=False)
generate_caption_coco(idx=983, train=False)

In [None]:
show_image(idx=744, train=False)
generate_caption_coco(idx=744, train=False)

In [None]:
show_image(idx=515, train=False)
generate_caption_coco(idx=515, train=False)

In [None]:
show_image(idx=362, train=False)
generate_caption_coco(idx=362, train=False)

In [None]:
[Predicted_caption,True_captions]=generate_caption_coco(idx=103, train=False)
print(True_captions)

In [None]:
print(Predicted_caption)

In [None]:
from nltk.translate.bleu_score import SmoothingFunction
from nltk.translate.bleu_score import sentence_bleu
chencherry = SmoothingFunction()

for idxx in range(len(filenames_val)):
    [Predicted_caption,True_captions]=generate_caption_coco(idx=idxx, train=False)
    candidate=Predicted_caption.split()
    reference=[]
    for caption in True_captions:
        reference.append(caption.split())



In [None]:
def my_lcs(string, sub):
  
    if(len(string)< len(sub)):
        sub, string = string, sub

    lengths = [[0 for i in range(0,len(sub)+1)] for j in range(0,len(string)+1)]

    for j in range(1,len(sub)+1):
        for i in range(1,len(string)+1):
            if(string[i-1] == sub[j-1]):
                lengths[i][j] = lengths[i-1][j-1] + 1
            else:
                lengths[i][j] = max(lengths[i-1][j] , lengths[i][j-1])

    return lengths[len(string)][len(sub)]

In [None]:
    def calc_score(candidate, refs):
        
        beta=1.2

        prec = []
        rec = []

        
        token_c = candidate.split(" ")
        for reference in refs:
            
            token_r = reference.split(" ")
            
            lcs = my_lcs(token_r, token_c)
            prec.append(lcs/float(len(token_c)))
            rec.append(lcs/float(len(token_r)))

        prec_max = max(prec)
        rec_max = max(rec)

        if(prec_max!=0 and rec_max !=0):
            score = ((1 + beta**2)*prec_max*rec_max)/float(rec_max + beta**2*prec_max)
        else:
            score = 0.0
        return score


In [None]:

from prettytable import PrettyTable
from nltk.translate.bleu_score import SmoothingFunction
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.meteor_score import meteor_score

nltk.download('wordnet')
chencherry = SmoothingFunction()


dat_dtype = {
        'names' : ('idx', 'BLEU_1','BLEU_2','BLEU_3','BLEU_4','ROUGE','METEOR'),
        'formats' : ('i', 'f','f','f','f','f','f')}
dat = np.zeros(len(filenames_val), dat_dtype)

x = PrettyTable(dat.dtype.names)
for idxx in range(len(filenames_val)):

    candidate=[]
    reference=[]
    [Predicted_caption,True_captions]=generate_caption_coco(idx=idxx, train=False)
    candidate=Predicted_caption.split()
    for caption in True_captions:
        reference.append(caption.split())
    dat['idx'][idxx] = idxx
    dat['BLEU_1'][idxx] = sentence_bleu(reference, candidate, weights=(1, 0, 0, 0), smoothing_function=chencherry.method4)
    dat['BLEU_2'][idxx] = sentence_bleu(reference, candidate, weights=(0.5, 0.5, 0, 0), smoothing_function=chencherry.method4)
    dat['BLEU_3'][idxx] = sentence_bleu(reference, candidate, weights=(0.33, 0.33, 0.33, 0), smoothing_function=chencherry.method4)
    dat['BLEU_4'][idxx] = sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=chencherry.method4)    
    dat['ROUGE'][idxx] = calc_score(Predicted_caption, True_captions)
    dat['METEOR'][idxx] = round(meteor_score(True_captions,Predicted_caption),4)
    
np.save('results',dat)




In [None]:
for row in dat:
    x.add_row(row)

print(x)


In [None]:
from prettytable import PrettyTable
dat2_dtype = {
        'names' : ('BLEU_1','BLEU_2','BLEU_3','BLEU_4','ROUGE','METEOR'),
        'formats' : ('f','f','f','f','f','f')}
dat2 = np.zeros(1, dat2_dtype)
x2 = PrettyTable(dat2.dtype.names)

dat2['BLEU_1']=np.mean(dat['BLEU_1'])
dat2['BLEU_2']=np.mean(dat['BLEU_2'])
dat2['BLEU_3']=np.mean(dat['BLEU_3'])
dat2['BLEU_4']=np.mean(dat['BLEU_4'])
# dat2['ROUGE']=np.mean(dat['ROUGE'])
# dat2['METEOR']=np.mean(dat['METEOR'])

# for row in dat2:
#     x2.add_row(row)
# print(x2)
# # print(dat2)
# np.save('dat_VGG',dat2)
# data = np.load('dat_VGG.npy')
# # print the array
# print(data)


In [None]:
np.mean(dat['BLEU_3'][350:400])

In [None]:
dat=np.load('results1.npy')

In [None]:
from prettytable import PrettyTable
dat2_dtype = {
        'names' : ('BLEU_1','BLEU_2','BLEU_3','BLEU_4','ROUGE','METEOR'),
        'formats' : ('f','f','f','f','f','f')}
dat2 = np.ones(3, dat2_dtype)
x2 = PrettyTable(dat2.dtype.names)

dat2['BLEU_1'][0]=np.max(dat['BLEU_1'])
dat2['BLEU_2'][0]=np.max(dat['BLEU_2'])
dat2['BLEU_3'][0]=np.max(dat['BLEU_3'])
dat2['BLEU_4'][0]=np.max(dat['BLEU_4'])
dat2['ROUGE'][0]=np.max(dat['ROUGE'])
dat2['METEOR'][0]=np.max(dat['METEOR'])
dat2['BLEU_1'][1]=np.mean(dat['BLEU_1'])
dat2['BLEU_2'][1]=np.mean(dat['BLEU_2'])
dat2['BLEU_3'][1]=np.mean(dat['BLEU_3'])
dat2['BLEU_4'][1]=np.mean(dat['BLEU_4'])
dat2['ROUGE'][1]=np.mean(dat['ROUGE'])
dat2['METEOR'][1]=np.mean(dat['METEOR'])

dat2['BLEU_1'][2]=np.mean(dat2['BLEU_1'])
dat2['BLEU_2'][2]=np.mean(dat2['BLEU_2'])
dat2['BLEU_3'][2]=np.mean(dat2['BLEU_3'])
dat2['BLEU_4'][2]=np.mean(dat2['BLEU_4'])
dat2['ROUGE'][2]=np.mean(dat2['ROUGE'])
dat2['METEOR'][2]=np.mean(dat2['METEOR'])
for row in dat2:
    x2.add_row(row)
print(x2)

np.save('dat_VGG',dat2)


In [None]:
np.mean(dat2['BLEU_1'])

In [None]:
np.load('dat_VGG.npy')

In [None]:
candidate=['a',
 'plane',
 'flying',
 'in',
 'the',
 'sky',
 'with',
 'a',
 'lot',
 'of',
 'smoke',
 'eeee']
reference=[['A', 'big', 'airplane', 'flying', 'in', 'the', 'big', 'blue', 'sky'],
 ['Large,', 'two', 'decked,', 'four', 'engined', 'airliner', 'in', 'flight.'],
 ['An', 'AirFrance', 'jet', 'airplane', 'flying', 'in', 'the', 'sky'],
 ['A', 'big', 'plane', 'with', 'AirFrance', 'on', 'the', 'side', 'of', 'it.'],
 ['An', 'Air', 'France', 'air', 'plane', 'in', 'mid', 'flight.']]


In [None]:
reference=[]
for caption in True_captions:
    reference.append(caption.split())
reference
    

# EVALUATE Bleu-1,2,3,4 Metric

In [None]:
from nltk.translate.bleu_score import SmoothingFunction
from nltk.translate.bleu_score import sentence_bleu
# score=sentence_bleu(reference,candidate)
chencherry = SmoothingFunction()
print('Cumulative bleu-1: %f' % sentence_bleu(reference, candidate, weights=(1, 0, 0, 0), smoothing_function=chencherry.method4))
print('Cumulative bleu-2: %f' % sentence_bleu(reference, candidate, weights=(0.5, 0.5, 0, 0), smoothing_function=chencherry.method4))
print('Cumulative bleu=3: %f' % sentence_bleu(reference, candidate, weights=(0.33, 0.33, 0.33, 0), smoothing_function=chencherry.method4))
print('Cumulative bleu-4: %f' % sentence_bleu(reference, candidate, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=chencherry.method4))

In [None]:
True_captions[0]


In [None]:
import nltk
nltk.download('wordnet')

In [None]:
True_captions

In [None]:
from nltk.translate.meteor_score import meteor_score
round(meteor_score(True_captions,Predicted_caption),4)