In [1]:
import tensorflow as tf
from keras import backend as K
import numpy as np
from keras.layers import Embedding
# print(tf.__version__)

Using TensorFlow backend.


In [2]:
def conv2d(x, w):
    return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME')

def maxpool2d(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

In [3]:
hidden_size=256
vb_size=18
batchsize=2
weights = {
    'W_conv1': tf.get_variable('W0', shape=(3,3,3,32), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv2': tf.get_variable('W1', shape=(3,3,32,32), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv3': tf.get_variable('W2', shape=(3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv4': tf.get_variable('W3', shape=(3,3,64,64), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv5': tf.get_variable('W4', shape=(3,3,64,128), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv6': tf.get_variable('W5', shape=(3,3,128,128), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_fc1': tf.get_variable('W6', shape=(28*28*128,1024), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_fc2': tf.get_variable('W7', shape=(1024,50), initializer=tf.contrib.layers.xavier_initializer()), 
    'out': tf.get_variable('W8', dtype = tf.float64,shape=(hidden_size,vb_size), initializer=tf.contrib.layers.xavier_initializer()), 
    }
biases = {
    'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
    'bc2': tf.get_variable('B1', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
    'bc3': tf.get_variable('B2', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
    'bc4': tf.get_variable('B3', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
    'bc5': tf.get_variable('B4', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
    'bc6': tf.get_variable('B5', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
    'b_fc1': tf.get_variable('B6', shape=(1024), initializer=tf.contrib.layers.xavier_initializer()),
    'b_fc2': tf.get_variable('B7', shape=(50), initializer=tf.contrib.layers.xavier_initializer()),
    'out': tf.get_variable('B8', dtype = tf.float64,shape=(vb_size), initializer=tf.contrib.layers.xavier_initializer()),
    }

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [4]:
class GRU:
    """Implementation of a Gated Recurrent Unit (GRU) as described in [1].
    
    [1] Chung, J., Gulcehre, C., Cho, K., & Bengio, Y. (2014). Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555.
    
    Arguments
    ---------
    input_dimensions: int
        The size of the input vectors (x_t).
    hidden_size: int
        The size of the hidden layer vectors (h_t).
    dtype: obj
        The datatype used for the variables and constants (optional).
    """
    
    def __init__(self, input_dimensions, hidden_size, dtype=tf.float64):
        self.input_dimensions = input_dimensions
        self.hidden_size = hidden_size
        self.input_layer=[]
        
        # Weights for input vectors of shape (input_dimensions, hidden_size)
        self.Wr = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.input_dimensions, self.hidden_size), mean=0, stddev=0.01), name='Wr')
        self.Wz = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.input_dimensions, self.hidden_size), mean=0, stddev=0.01), name='Wz')
        self.Wh = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.input_dimensions, self.hidden_size), mean=0, stddev=0.01), name='Wh')
        
        # Weights for hidden vectors of shape (hidden_size, hidden_size)
        self.Ur = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size, self.hidden_size), mean=0, stddev=0.01), name='Ur')
        self.Uz = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size, self.hidden_size), mean=0, stddev=0.01), name='Uz')
        self.Uh = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size, self.hidden_size), mean=0, stddev=0.01), name='Uh')
        
        # Biases for hidden vectors of shape (hidden_size,)
        self.br = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size,), mean=0, stddev=0.01), name='br')
        self.bz = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size,), mean=0, stddev=0.01), name='bz')
        self.bh = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size,), mean=0, stddev=0.01), name='bh')
        
        # Define the input layer placeholder
        self.input_layer = tf.placeholder(dtype=tf.float64, shape=(None, None, input_dimensions), name='input')
        
        # Put the time-dimension upfront for the scan operator
        self.x_t = tf.transpose(self.input_layer, [1, 0, 2], name='x_t')
        
        # A little hack (to obtain the same shape as the input matrix) to define the initial hidden state h_0
        self.h_0 = tf.matmul(self.x_t[0, :, :], tf.zeros(dtype=tf.float64, shape=(input_dimensions, hidden_size)), name='h_0')
        
        # Perform the scan operator
        self.h_t_transposed = tf.scan(self.forward_pass, self.x_t, initializer=self.h_0, name='h_t_transposed')
        
        # Transpose the result back
        self.h_t = tf.transpose(self.h_t_transposed, [1, 0, 2], name='h_t')

    def forward_pass(self, h_tm1, x_t):
        """Perform a forward pass.
        
        Arguments
        ---------
        h_tm1: np.matrix
            The hidden state at the previous timestep (h_{t-1}).
        x_t: np.matrix
            The input vector.
        """
        # Definitions of z_t and r_t
        z_t = tf.sigmoid(tf.matmul(x_t, self.Wz) + tf.matmul(h_tm1, self.Uz) + self.bz)
        r_t = tf.sigmoid(tf.matmul(x_t, self.Wr) + tf.matmul(h_tm1, self.Ur) + self.br)
        
        # Definition of h~_t
        h_proposal = tf.tanh(tf.matmul(x_t, self.Wh) + tf.matmul(tf.multiply(r_t, h_tm1), self.Uh) + self.bh)
        
        # Compute the next hidden state
        h_t = tf.multiply(1 - z_t, h_tm1) + tf.multiply(z_t, h_proposal)
#         print(self.x_t.shape)
        
        print("h_t:",h_t.shape)
#         print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
        
        return h_t
    


In [11]:
epsilon = 1e-3

In [15]:
def batch_norm_wrapper(inputs, is_training, decay = 0.999):

    scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
    beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
    pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
    pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)

    if is_training:
        batch_mean, batch_var = tf.nn.moments(inputs,[0])
        train_mean = tf.assign(pop_mean,
                               pop_mean * decay + batch_mean * (1 - decay))
        train_var = tf.assign(pop_var,
                              pop_var * decay + batch_var * (1 - decay))
        with tf.control_dependencies([train_mean, train_var]):
            return tf.nn.batch_normalization(inputs,batch_mean, batch_var, beta, scale, epsilon)
    else:
        return tf.nn.batch_normalization(inputs,pop_mean, pop_var, beta, scale, epsilon)

In [42]:
# x = tf.placeholder("float", [None,28,28,1])
# y = tf.placeholder("float", [None, n_classes])
def cnn(x,weights,biases):
    print("in cnn")
    
    '''
    weights = {'W_conv1':tf.Variable(tf.random_normal([3,3,1,32])),#56
               'W_conv2':tf.Variable(tf.random_normal([3,3,32,32])),#56
               'W_conv3':tf.Variable(tf.random_normal([3,3,32,64])),#28
               'W_conv4':tf.Variable(tf.random_normal([3,3,64,64])),#28
               'W_conv5':tf.Variable(tf.random_normal([3,3,64,128])),#14
               'W_conv6':tf.Variable(tf.random_normal([3,3,128,128])),#14
               'W_fc1':tf.Variable(tf.random_normal([7*7*128,1024])),  # since 3 times maxpooling.. inputsize/2^3
               'W_fc2':tf.Variable(tf.random_normal([1024,1024]))
              }
                  # depending on what that repeat vector does

    biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
               'b_conv2':tf.Variable(tf.random_normal([32])),
               'b_conv3':tf.Variable(tf.random_normal([64])),
               'b_conv4':tf.Variable(tf.random_normal([64])),
               'b_conv5':tf.Variable(tf.random_normal([128])),
               'b_conv6':tf.Variable(tf.random_normal([128])),
               'b_fc1':tf.Variable(tf.random_normal([1024])),
               'b_fc2':tf.Variable(tf.random_normal([1024]))
             }
    '''
    
    print("-1")
#     x = tf.convert_to_tensor(x)
    print("00")
    print("bef",x.shape)
    x = tf.reshape(x, shape=[-1, 224, 224, 3])
    print("aft",x.shape)
    print("0")
    conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])+  biases['bc1'])
    print("********",weights['W_conv1'])
    print("1")
    print("conv1:",conv1.shape)
    conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['bc2'])
    print("2")
    print("conv2:",conv2.shape)
    conv2 = maxpool2d(conv2)
    print("3")
    print("maxpool:",conv2.shape)
    conv2 = tf.nn.dropout(conv2, 0.25)
#     print("dropout:",conv2.shape)
    print("okay")
    
    conv3 = tf.nn.relu(conv2d(conv2, weights['W_conv3']) + biases['bc3'])
    print("conv3:",conv3.shape)
    conv4 = tf.nn.relu(conv2d(conv3, weights['W_conv4']) + biases['bc4'])
    print("conv3:",conv3.shape)
    #conv4 = conv3
    conv4 = maxpool2d(conv4)
    print("maxpool:",conv4.shape)
    conv4 = tf.nn.dropout(conv4, 0.25)
    
    conv5 = tf.nn.relu(conv2d(conv4, weights['W_conv5']) + biases['bc5'])
    print("conv5:",conv5.shape)
    conv6 = tf.nn.relu(conv2d(conv5, weights['W_conv6']) + biases['bc6'])
    print("conv6:",conv6.shape)
    #conv6 = conv5
    conv6 = maxpool2d(conv6)
    print("conv6:",conv6.shape)
    conv6 = tf.nn.dropout(conv6, 0.25)

    fc1 = tf.reshape(conv6,[-1, weights['W_fc1'].get_shape().as_list()[0]])
    fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1'])+biases['b_fc1'])
    print("fc1:",fc1.shape)
    fc1 = tf.nn.dropout(fc1, 0.3)
    
    fc2 = tf.nn.relu(tf.matmul(fc1, weights['W_fc2'])+biases['b_fc2'])
    fc2 = tf.nn.dropout(fc2, 0.3)  
    #fc2 = fc1
    
#     out = tf.add(tf.matmul(fc2, weights['out']), biases['out'])
    print("fc2:",fc2.shape)
    print(fc2)
    
#     x_norm = batch_norm_wrapper(fc2,is_training)
    
    
#     inputs=fc2
#     scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
#     beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
#     pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
#     pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
#     decay=0.9999

#     if is_training:
#         batch_mean, batch_var = tf.nn.moments(inputs,[0])
#         train_mean = tf.assign(pop_mean,
#                                pop_mean * decay + batch_mean * (1 - decay))
#         train_var = tf.assign(pop_var,
#                               pop_var * decay + batch_var * (1 - decay))
#         with tf.control_dependencies([train_mean, train_var]):
#             return tf.nn.batch_normalization(inputs,batch_mean, batch_var, beta, scale, epsilon)
#     else:
#         return tf.nn.batch_normalization(inputs,pop_mean, pop_var, beta, scale, epsilon)
    
    
#     x_norm = tf.layers.batch_normalization(fc2, training=True)
#     input_gru = tf.repeat(fc2,)
    
#     print(x_norm.shape)
    return fc2

# def Gru(hidden_size):  
#     gru = GRU(1024,hidden_size)

#     W_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(hidden_size, 1), mean=0, stddev=0.01))
#     b_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(1,), mean=0, stddev=0.01))
#     output = tf.map_fn(lambda h_t: tf.matmul(h_t, W_output) + b_output, gru.h_t)

#     return output

In [7]:
# import torch.utils.data as data
import cv2
import sys
from os import listdir
from os.path import join
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical

def resize_img(png_file_path):
        img_rgb = cv2.imread(png_file_path)
        img_grey = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
        img_adapted = cv2.adaptiveThreshold(img_grey, 255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY, 101, 9)
        img_stacked = np.repeat(img_adapted[...,None],3,axis=2)
        resized = cv2.resize(img_stacked, (224,224), interpolation=cv2.INTER_AREA)
        bg_img = 255 * np.ones(shape=(224,224,3))
#         print(bg_img.shape,resized.shape)
        bg_img[0:224, 0:224,:] = resized
        bg_img /= 255
        bg_img = np.rollaxis(bg_img, 2, 0)  
#         print(bg_img.shape)
        return bg_img
    
def load_doc(filename):
    file = open(filename, 'r',encoding='UTF-8')
    text = file.read()
    file.close()
    return text

class Dataset():
    def __init__(self, data_dir, input_transform=None, target_transform=None):
        self.data_dir = data_dir
        self.image_filenames = []
        self.texts = []
        all_filenames = listdir(data_dir)
        all_filenames.sort()
        for filename in (all_filenames):
            if filename[-3:] == "png":
                self.image_filenames.append(filename)
            else:
                text = '<START> ' + load_doc(self.data_dir+filename) + ' <END>'
                text = ' '.join(text.split())
                text = text.replace(',', ' ,')
                self.texts.append(text)
        self.input_transform = input_transform
        self.target_transform = target_transform
        
        # Initialize the function to create the vocabulary 
        tokenizer = Tokenizer(filters='', split=" ", lower=False)
        # Create the vocabulary 
        tokenizer.fit_on_texts([load_doc('vocabulary.vocab')])
        self.tokenizer = tokenizer
        # Add one spot for the empty word in the vocabulary 
        self.vocab_size = len(tokenizer.word_index) + 1
        # Map the input sentences into the vocabulary indexes
        self.train_sequences = tokenizer.texts_to_sequences(self.texts)
        # The longest set of boostrap tokens
        self.max_sequence = max(len(s) for s in self.train_sequences)
        # Specify how many tokens to have in each input sentence
        self.max_length = 48
        
        X, y, image_data_filenames = list(), list(), list()
        for img_no, seq in enumerate(self.train_sequences):
            in_seq, out_seq = seq[:-1], seq[1:]
            out_seq = to_categorical(out_seq, num_classes=self.vocab_size)
            image_data_filenames.append(self.image_filenames[img_no])
            X.append(in_seq)
            y.append(out_seq)
                
        self.X = X
        self.y = y
        self.image_data_filenames = image_data_filenames
        self.images = list()
        for image_name in self.image_data_filenames:
            image = resize_img(self.data_dir+image_name)
            self.images.append(image)

In [8]:
dir_name = 'all_data5/'
batch_size = 32
my_dateset = Dataset(dir_name)

In [9]:
x_train = np.array(my_dateset.images,dtype=np.float32)
for i in range(len(x_train)):
    x_train[i]=np.array(x_train[i],dtype=np.float32)
print(x_train.shape)
# batch_size = 128
# dataset = tf.data.Dataset.from_tensor_slices((x))
# iterator = dataset.repeat().batch(batch_size).make_initializable_iterator()
# data_batch = iterator.get_next()

(50, 3, 224, 224)


In [43]:
im = tf.placeholder(dtype=tf.float32, shape=(None,3,224,224), name='im')
# is_training = tf.placeholder(dtype=tf.bool, name="is_training")
model1 = cnn(im,weights,biases)
output_train = batch_norm_wrapper(model1,True)
output_test = batch_norm_wrapper(model1,False)

in cnn
-1
00
bef (?, 3, 224, 224)
aft (?, 224, 224, 3)
0
******** <tf.Variable 'W0:0' shape=(3, 3, 3, 32) dtype=float32_ref>
1
conv1: (?, 224, 224, 32)
2
conv2: (?, 224, 224, 32)
3
maxpool: (?, 112, 112, 32)
okay
conv3: (?, 112, 112, 64)
conv3: (?, 112, 112, 64)
maxpool: (?, 56, 56, 64)
conv5: (?, 56, 56, 128)
conv6: (?, 56, 56, 128)
conv6: (?, 28, 28, 128)
fc1: (?, 1024)
fc2: (?, 50)
Tensor("dropout_54/mul_1:0", shape=(?, 50), dtype=float32)


In [30]:
gru = GRU(100,256)
hidden_size=256

# W_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(hidden_size, 18), mean=0, stddev=0.01),trainable=True)
# b_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(18,), mean=0, stddev=0.01),trainable=True)

W_output = weights['out']
b_output = biases['out']

# output = tf.map_fn(lambda h_t: tf.matmul(h_t, W_output) + b_output, gru.h_t)

output1 = tf.nn.relu(tf.matmul(gru.h_t,W_output)+b_output)
# out2 = tf.matmul(gru.h_t[0], W_output)+b_output

# tf.get_variable('W7', shape=(1024,50), initializer=tf.contrib.layers.xavier_initializer())

out3 = gru.h_t
print(out3.shape)

h_t: (?, 256)
(?, ?, 256)


In [31]:
y_train = my_dateset.X
y_train=np.array(y_train)
        
for i in range(len(y_train)):
    y_train[i]=np.array(y_train[i])
#     for j in range(len(x1[i])):
#         x1[i][j]=np.array(x1[i][j])
print(y_train.shape)
print(y_train[0].shape)
    

# x1=tf.constant(x1[0])
print("hi")
            
VOCAB_LEN=19
EMBED_SIZE=50
embeddings = tf.Variable(tf.random_uniform([VOCAB_LEN, EMBED_SIZE]))
caption_p = tf.placeholder(dtype=tf.int32, shape=(None,None), name='caption_p')
embed = tf.nn.embedding_lookup(embeddings, caption_p)

(50,)
(59,)
hi


In [32]:
expected = my_dateset.y
expected=np.array(expected)
for e in range(len(expected)):
    expected[e]=np.array(expected[e])
print(expected.shape)

(50,)


In [33]:
expected_output = tf.placeholder(dtype=tf.float64, shape=(None,None,None), name='expected_output')
loss = tf.reduce_sum(tf.squared_difference(output1 ,expected_output)) #/ float(1)
# e_loss = tf.placeholder(dtype=tf.float64,name='e_loss')
# epoch_loss = tf.reduce_sum(e_loss)
# train2 = tf.train.AdamOptimizer(0.0001).minimize(tf.reduce_sum(e_loss))
# train_step = tf.train.AdamOptimizer().minimize()
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)

In [34]:
import functools 
def pad(batch_y):
    print(batch_y.shape)
    x=0
    for y in batch_y:
        if(len(y)>x):
            x=len(y)
#     x = functools.reduce(lambda x,y: len(x) if(len(x)>len(y)) else len(y),batch_y)
    
    ret = []
    for y in range(len(batch_y)):
        res=np.zeros(x)
        s = batch_y[y]
        res[0:len(s)]=batch_y[y]
#         batch_y[y]=res
        ret.append(res)
    return np.array(ret)
        
        
# a=[[1,2],[1,2,3]]
# pad(a)

def pad2(batch_ex):
#     r = functools.reduce(lambda x,y: len(x) if(len(x)>len(y)) else len(y),batch_ex)
#     print(":::::",r)
    r=0
    c=0
    for ex in batch_ex:
        shape = ex.shape
#         print(shape)
        if(shape[0]>r):
            r=shape[0]
        if(shape[1]>c):
            c=shape[1]
#     c = functools.reduce(lambda x,y: len(x[0]) if(len(x[0])>len(y[0])) else len(y[0]),batch_ex)
#     print(":::::",c)
#     print(r,c)
    ret=[]
    for ex in batch_ex:
        res=np.zeros((r,c))
#         print(res.shape)
#         print(ex.shape)
        res[0:ex.shape[0],0:ex.shape[1]]=ex
        ret.append(res)
#     print(ret)
        
    return(np.array(ret))

In [44]:
epoch = 5
vocab_size = 19
batch_size=10

x_train = my_dateset.images
caption = my_dateset.X
expected = my_dateset.y

saver = tf.train.Saver()

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    loss_ar=[]
    for e in range(epoch):
        loss_no=[]
        print(e)
        for batch in range(len(x_train)//batch_size):
            print("batch ",batch)
            batch_x = x_train[batch*batch_size:min((batch+1)*batch_size,len(x_train))]
            batch_y = caption[batch*batch_size:min((batch+1)*batch_size,len(caption))] 
            batch_ex = expected[batch*batch_size:min((batch+1)*batch_size,len(expected))]
            print("x:",len(batch_x))
            print("y:",len(batch_y))
            print("ex:",len(batch_ex))
            
#             print(batch_y)
            
            batch_x = np.array(batch_x)
            for b in range(len(batch_x)):
                batch_x[b]=np.array(batch_x[b])
            batch_y = np.array(batch_y)
            for b in range(len(batch_y)):
                batch_y[b]=np.array(batch_y[b])
            batch_ex = np.array(batch_ex)
            for b in range(len(batch_ex)):
                batch_ex[b]=np.array(batch_ex[b])
                
            print("bex:",batch_ex.shape)
#             print(batch_ex[0].shape)
                
            batch_y = pad(batch_y)
            batch_ex = pad2(batch_ex)
            
#             batch_y = batch_y.reshape((-1,1))
#             print(batch_ex.shape)
            
            sess.run(init)
            # image = K.expand_dims(x[i], 0)
            # print("im:",image.shape)
            c,m = sess.run([output_train,model1],feed_dict={im:batch_x})
            print("c:",c)
            
#             x_norm = batch_norm_wrapper(c,True,)
            
#             x_norm = tf.layers.batch_normalization(c, training=True)
#             x = sess.run(x_norm)
#             print(x.shape)
#             # em = K.expand_dims(x1[i], 0)

            inp2 = sess.run(embed,feed_dict={caption_p:batch_y})
            print("inp2:",inp2.shape)

            features_try = K.tile(K.expand_dims(c, 1), [1, K.shape(inp2)[1], 1])
            print("ktile:",features_try.shape)
            embeddings = tf.concat([features_try,inp2],2)
            print("emb:",embeddings.shape)
            inp = sess.run(embeddings)

            a = sess.run(output1,feed_dict={gru.input_layer:inp})
            print("a:",a.shape,"\n")

            # print("Sssss:",ex.shape)
            ls,tr = sess.run([loss,train_step],feed_dict ={expected_output:batch_ex,gru.input_layer:inp})
            print(ls/batch_size)
            loss_no.append(ls/batch_size)
            print("\n\n")
#         el = sess.run(epoch_loss,feed_dict={e_loss:loss_no})
        loss_ar.append(loss_no)

        print("-----------------------------------------------------------------") 
    save_path = saver.save(sess, "model.ckpt")

0
batch  0
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 0.35692024  2.9965706  -0.58318686  2.999444   -0.51972705 -0.604216
   2.3877065   0.         -0.34385395 -0.73439574 -0.29710886 -0.33287397
   0.          0.         -0.6740249   0.         -0.60835344  0.
  -0.5272045  -0.33308235 -0.32096398 -0.33320534 -0.53074896  0.
   0.          0.         -0.47635096 -0.34680608 -0.4976686  -0.3331494
  -0.46046832 -0.6237836  -0.33330068  0.         -0.54804003 -0.33140495
  -0.33331788 -0.61182225  0.         -0.33304328  1.0604054  -0.49038157
  -0.33313498 -0.33324188  0.          2.9707754  -0.25224137  0.
  -0.33323824 -0.33324707]
 [-0.59517366 -0.3329523  -0.58318686 -0.33327156 -0.51972705 -0.604216
  -0.49089724  0.         -0.34385395 -0.73439574 -0.29710886 -0.33287397
   0.          0.         -0.6740249   0.          1.5421917   0.
  -0.5272045   2.9977407  -0.32096398 -0.33320534 -0.26807463  0.
   0.          0.         -0.47635096 -0.34680608 -0.4976686  -0.3331494
  -0.46

a: (10, 78, 18) 

104.41571504742888



batch  2
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.3333203  -0.5527879  -0.33281082  0.06270626  0.          0.
   0.6481334  -0.33327007 -0.44467747 -0.33312872 -0.33217272 -0.33329645
  -0.46843654 -0.4914525   0.          0.         -0.6235261   2.6955755
  -0.5120298   0.         -0.6129851  -0.49912417  1.2291927  -0.57982814
   0.          2.9835708  -0.33185306  0.          2.9308052  -0.33240056
  -0.4479201   1.7802154   0.         -0.652768   -0.33313295 -0.33312988
   0.         -0.4981858  -0.33310857 -0.36787394  0.         -0.5191187
  -0.32657775 -0.3331509   0.          0.          0.74528    -0.47442332
  -0.37382263  0.        ]
 [-0.3333203  -0.5527879  -0.33281082 -0.43590376  0.          0.
  -0.43530324 -0.33327007  0.7568493  -0.33312872 -0.33217272 -0.33329645
  -0.46843654 -0.4914525   0.          0.          1.4736409  -0.46305683
  -0.5120298   0.         -0.6129851   1.8667084  -0.49695468  0.32143348
   0.         -

a: (10, 78, 18) 

84.63102955182515



batch  4
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.7867642   2.7195237   2.9995904  -0.33135453 -0.49944556 -0.33330634
   1.8903478  -0.556639    0.         -0.41629973 -0.5171656  -0.5469619
  -0.5967991  -0.6153665  -0.6875885   2.9999487  -0.6352275  -0.5751049
  -0.32910812 -0.34243953 -0.49558952 -0.3331189  -0.48024687 -0.49053884
  -0.3328697  -0.45697874  0.14031738 -0.36466664  1.1737716  -0.3330007
  -0.34964737  2.75566    -0.34628394  0.          0.          0.
  -0.3326609   0.          0.         -0.33331823  2.999873   -0.49088648
  -0.60036504  0.          2.999082   -0.47092155 -0.79360443  2.971259
  -0.4078607  -0.32417268]
 [-0.7867642  -0.45941523 -0.3332878  -0.33135453 -0.49944556 -0.33330634
  -0.7763299  -0.556639    0.         -0.41629973 -0.5171656   2.6745925
  -0.5967991  -0.6153665  -0.6875885  -0.33332762 -0.6352275  -0.5751049
  -0.32910812  2.9989052  -0.49558952 -0.3331189  -0.48024687 -0.49053884
  -0.3328697 

a: (10, 94, 18) 

106.98761269058848



batch  1
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 0.         -0.33328736 -0.39918348 -0.49920267 -0.57849675 -0.33332637
  -0.33331877  0.99188805 -0.33330232  0.64047253  0.          2.0012944
  -0.5062576   0.         -0.33331046 -0.33008483 -0.614838    0.97136533
   0.          0.         -0.31480858  0.          0.         -0.4961752
  -0.64678454  0.         -0.33331093 -0.33324522  0.          0.98369074
  -0.4119559  -0.33326742 -0.3330514   0.         -0.33263457  0.
  -0.33302057  0.         -0.3331726  -0.605193    0.         -0.4688484
  -0.33244807 -0.3333101  -0.368358   -0.49982592  2.2324798  -0.333327
  -0.53458244  0.        ]
 [ 0.         -0.33328736  0.25513902 -0.49920267 -0.57849675 -0.33332637
  -0.33331877 -0.46191612 -0.33330232 -0.49693617  0.         -0.499979
   2.1237516   0.         -0.33331046 -0.33008483 -0.614838    2.4396043
   0.          0.         -0.31480858  0.          0.          2.2604296
  -0.64678454 

a: (10, 76, 18) 

75.08910541566097



batch  3
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.49614182  0.85270065 -0.33316255 -0.43349078 -0.49274713  2.1651614
  -0.56699145  0.         -0.44394574  0.         -0.35761252 -0.4770089
  -0.33324128 -0.33327594 -0.33252105  0.         -0.4980803  -0.3333197
   0.         -0.33322206 -0.33316785 -0.33328763 -0.33298555  0.
   0.         -0.5971174  -0.39929873  2.9993172   0.         -0.33328933
  -0.48765847  0.         -0.46601647  0.         -0.3332415  -0.3330036
  -0.59571767  0.         -0.3327512   0.         -0.32201332 -0.37191534
  -0.3331954   0.         -0.5536002  -0.4277535  -0.49172547 -0.48792374
  -0.13487852 -0.33331585]
 [-0.49614182 -0.590753   -0.33316255 -0.43349078 -0.49274713 -0.49838808
  -0.56699145  0.         -0.44394574  0.         -0.35761252 -0.4770089
   2.9991715  -0.33327594 -0.33252105  0.         -0.4980803   2.9998772
   0.         -0.33322206 -0.33316785 -0.33328763 -0.33298555  0.
   0.         -0.597

a: (10, 78, 18) 

95.95732985446764



-----------------------------------------------------------------
2
batch  0
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 0.18363667  1.9162171   0.          0.         -0.3333195   0.
   0.         -0.3327924  -0.76374114  0.          1.8231454  -0.5656245
  -0.27971244 -0.4430742   0.         -0.33316618 -0.36105192 -0.33189303
  -0.3331428  -0.4893546  -0.49403095 -0.30921438 -0.333301    0.
   2.6284235  -0.6168884   2.2435918  -0.3945706  -0.8880844  -0.33308232
   0.7154377   2.4028938   1.3592846  -0.42563915 -0.5989      0.
  -0.45846596 -0.468887   -0.33328757 -0.52262855 -0.4836644  -0.33326817
   0.          0.0816198   2.9143069  -0.36410728 -0.3333174  -0.33328983
   0.64946663 -0.3332338 ]
 [-0.5545679  -0.49965003  0.          0.         -0.3333195   0.
   0.          2.9951315   1.2449538   0.         -0.53227776 -0.5656245
  -0.27971244 -0.4430742   0.         -0.33316618 -0.36105192 -0.33189303
  -0.3331428  -0.4893546  -0.49403095 

a: (10, 78, 18) 

104.08742208629155



batch  2
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.49758622 -0.333312   -0.3327892  -0.333134   -0.3386892   0.
  -0.49495885 -0.6160258  -0.3333226  -0.4957703   1.5178523   0.
  -0.49600527 -0.546489    0.         -0.5007718  -0.333156   -0.3333118
   0.7955318  -0.33305535 -0.49905294 -0.6339062  -0.33331573 -0.48966268
   2.98775    -0.44865474  0.         -0.3332804   0.         -0.6136186
  -0.3476328  -0.33330163 -0.88836706  0.         -0.47335646 -0.4596514
   0.          0.          0.          0.          0.          0.
  -0.585912   -0.4656702  -0.0958024  -0.493766   -0.4821524   0.92475456
   2.1142454  -0.3295278 ]
 [ 1.7789316  -0.333312   -0.3327892  -0.333134   -0.29011157  0.
  -0.49495885 -0.6160258  -0.3333226  -0.4957703  -0.48997778  0.
   0.00829664 -0.546489    0.          1.283778   -0.333156   -0.3333118
  -0.56300133 -0.33305535 -0.49905294 -0.6339062  -0.33331573  1.5076103
  -0.33197224 -0.44865474  0.         -0.3

a: (10, 78, 18) 

79.32716206439686



batch  4
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.33692774  2.6581197  -0.49947003 -0.39126527 -0.33331352  0.
  -0.48417303 -0.38238266 -0.5267037   2.8749895   2.9940944  -0.585519
   0.         -0.5369148   0.          2.5171552  -0.33331153 -0.5256436
   0.21584827 -0.4999065  -0.33603615 -0.33331004 -0.3324658  -0.45867696
   2.9973376  -0.49907336 -0.3228144  -0.33330378 -0.3332058   2.8731618
  -0.33325368 -0.333329   -0.47979957 -0.6532649  -0.57658154  0.
   0.         -0.33332098 -0.33306056 -0.28200996 -0.50411093 -0.56012607
   2.9984243   0.         -0.32324377 -0.48771796 -0.33320692 -0.33318913
  -0.33330163  0.        ]
 [ 2.999492   -0.46810612  1.8958602  -0.39126527 -0.33331352  0.
   1.378818   -0.38238266 -0.5267037  -0.45919406 -0.33267716 -0.585519
   0.         -0.5369148   0.         -0.55882704 -0.33331153 -0.35232168
  -0.39554685  2.0107536  -0.33603615 -0.33331004 -0.3324658   2.7245736
  -0.33303753  2.1314795  -0.

a: (10, 94, 18) 

89.14869178235816



batch  1
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.4433558   0.8915479  -0.33318457 -0.4998998  -0.46137452 -0.33327806
  -0.46320745 -0.35700423 -0.48347345 -0.42908502  2.9977243  -0.33310735
  -0.3761211   0.          2.986978   -0.56007427  2.9923768  -0.33331695
  -0.3329922  -0.48092443 -0.3330662  -0.3306661  -0.33242157  2.9998174
  -0.33325475 -0.3332772  -0.33332103 -0.33332396 -0.33329403 -0.3332882
  -0.11301953 -0.33331054 -0.28566095 -0.33329293  0.          2.9999716
   0.          0.         -0.33305836 -0.33320403  0.         -0.4169898
  -0.33331117 -0.41383204 -0.495162   -0.3788295   0.         -0.44601882
  -0.53571206 -0.33331665]
 [-0.4433558  -0.7681873  -0.33318457 -0.4998998  -0.46137452 -0.33327806
  -0.46320745 -0.35700423 -0.54729265  2.8640866  -0.34367463 -0.33310735
  -0.3761211   0.         -0.33188644  0.44599944 -0.35822326 -0.33331695
  -0.3329922  -0.48092443 -0.3330662  -0.3306661   2.991794   -0.33331305
  

a: (10, 76, 18) 

108.1069252964663



batch  3
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 0.         -0.25082213 -0.49924374 -0.33324462 -0.49816024 -0.47679606
  -0.33320963  1.1202762   0.         -0.4527533   2.156276    2.9987059
   2.998962   -0.33306393 -0.29734293  2.9374554   0.         -0.26052135
  -0.45475528 -0.47787318  0.         -0.33329588  0.         -0.33330002
   0.          1.3873334  -0.3332489   0.          2.271284   -0.33312026
  -0.23591751 -0.45183918 -0.47670567 -0.3312121  -0.42480388 -0.3331926
  -0.33329272  0.          0.          0.         -0.41648793  2.850212
   0.          1.7217062  -0.58192104 -0.33331767 -0.6149434  -0.33330747
  -0.3788052  -0.33327642]
 [ 0.         -0.4376361  -0.49924374 -0.33324462 -0.49816024 -0.47679606
   2.9988866  -0.61196536  0.          2.7589712  -0.63045996 -0.33318952
  -0.34106576 -0.33306393 -0.29734293  0.27624518  0.         -0.26052135
  -0.45475528  0.27810663  0.         -0.33329588  0.         -0.33330002
  

a: (10, 78, 18) 

91.09301321236016



-----------------------------------------------------------------
4
batch  0
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.6150655  -0.4857558  -0.33305988  0.          0.         -0.3327693
  -0.33330047 -0.3333132  -0.3331576   0.         -0.70085055 -0.5149358
  -0.6528721  -0.5329373   0.          0.         -0.30150354 -0.33275342
  -0.62401086 -0.3191755   0.         -0.3332519   0.         -0.3332268
  -0.33322155  2.997508   -0.48088986 -0.27904922 -0.33323637 -0.32533967
  -0.33320522  2.9933176   2.9993243   2.9975073  -0.4999799  -0.33301842
  -0.33330014 -0.49850085  2.999837    0.         -0.3327782  -0.3705219
  -0.33327907  2.9406867  -0.6079655  -0.49275526 -0.38992056 -0.33331034
  -0.16626212 -0.33315653]
 [-0.6150655   0.6353109  -0.33305988  0.          0.         -0.3327693
  -0.33330047 -0.3333132  -0.3331576   0.          1.8619356  -0.5149358
   1.3357283  -0.5329373   0.          0.         -0.30150354 -0.33275342
  -0.62401

a: (10, 78, 18) 

112.5032840768987



batch  2
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 1.2286499   0.         -0.6398854   1.0322235  -0.5838146  -0.3323927
  -0.3332084  -0.39968327  0.         -0.4678724  -0.3326932  -0.33329877
   2.9997878   0.         -0.44963515 -0.48022923 -0.3333194  -0.32747927
   1.4948225  -0.49848995 -0.49821848  0.         -0.42266247 -0.33282253
   2.0139527  -0.333207   -0.15519968 -0.33321112 -0.3333236  -0.48894832
  -0.3321591   1.9033301  -0.3333103  -0.4447301   0.          0.
  -0.3331713   0.         -0.33329633 -0.3323045  -0.5763682   0.
  -0.38022467 -0.4661056   0.         -0.49294     0.         -0.33318248
  -0.49552643 -0.44528148]
 [-0.67184573  0.         -0.6398854  -0.46403557 -0.5838146  -0.3323927
  -0.3332084   2.940983    0.         -0.4678724  -0.3326932   2.999689
  -0.33330977  0.          2.875784   -0.48022923 -0.3333194  -0.32747927
  -0.6497342  -0.49848995  2.1766949   0.          0.496164    2.9954028
  -0.4999732  -0.33

a: (10, 78, 18) 

100.56676569354096



batch  4
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 0.          0.         -0.40255204 -0.49479797  0.         -0.33317533
   1.0858994   1.2041056   0.          0.         -0.8483163  -0.49858996
  -0.4937994  -0.33332008 -0.4666374   2.9999018  -0.33331314 -0.3326216
  -0.9117225  -0.7805271   0.         -0.32766166  0.         -0.3333059
  -0.45416102  0.         -0.33315617  2.8999236  -0.43033504 -0.3320683
   0.          0.450759    0.         -0.33330247 -0.33317402  0.
  -0.2760647  -0.3332951   2.6178052  -0.33332676 -0.3333105  -0.7711819
  -0.48576295  1.7440271   0.          2.8696408  -0.41954833 -0.4774748
  -0.36524126 -0.33323643]
 [ 0.          0.         -0.40255204 -0.49479797  0.         -0.33317533
  -0.59105307 -0.5702528   0.          0.          1.9392437  -0.49858996
  -0.4937994  -0.33332008  1.0637362  -0.3333224  -0.33331314 -0.3326216
   0.49545497 -0.7805271   0.         -0.32766166  0.         -0.3333059
  -0.4541610

In [45]:
print(loss_ar)

[[127.44862523339684, 104.41571504742888, 93.42080896118804, 84.63102955182515, 98.21128804298931], [106.98761269058848, 93.66092648296163, 75.08910541566097, 79.61150651826006, 95.95732985446764], [108.80419343463932, 104.08742208629155, 74.51018005371495, 79.32716206439686, 93.9205601926338], [89.14869178235816, 139.730591000098, 108.1069252964663, 97.3237773048415, 91.09301321236016], [94.26025404106366, 112.5032840768987, 91.90883326086202, 100.56676569354096, 105.40005576966482]]


In [46]:
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None
def load_val_images(data_dir):
    image_filenames =[]
    images = []
    all_filenames = listdir(data_dir)
    all_filenames.sort()
    for filename in (all_filenames):
        if filename[-3:] == "png":
            image_filenames.append(filename)
    for name in image_filenames:
        image = resize_img(data_dir+name)
        images.append(image)
    return images

In [47]:
decoded_words = []
star_text = '<START> '
image = load_val_images('all_data6/')[0]
img_tensor=np.expand_dims(np.array(image),0)
img_tensor=np.array(img_tensor)
''''image = Variable(torch.FloatTensor([image]))'''
predicted = '<START> '
 
img_tensor.shape

(1, 3, 224, 224)

In [49]:
predicted='<START>'
with tf.Session() as sess:
#     init = tf.global_variables_initializer()
#     sess.run(init)
    saver.restore(sess, "model.ckpt")
#     print("####",weights['W_conv1'].eval())
    for di in range(50):
        sequence = my_dateset.tokenizer.texts_to_sequences([star_text])[0]
#         print(sequence)
        decoder_input = np.array(sequence).reshape(-1,1)
#         c = sess.run(model1,feed_dict={im:img_tensor})
#         print("--------------------------")
        c,m = sess.run([output_test,model1],feed_dict={im:img_tensor})
#         print(di, c)
#         print("--------------------------")
        
        inp2 = sess.run(embed,feed_dict={caption_p:decoder_input})
        features_try = K.tile(K.expand_dims(c, 1), [1, K.shape(inp2)[1], 1])
        embeddings = tf.concat([features_try,inp2],2)
        inp = sess.run(embeddings)
        a = sess.run(output1,feed_dict={gru.input_layer:inp})
        data=list(a[0][0])
        i=data.index(max(data))
        word = word_for_id(i,my_dateset.tokenizer)
        if word is None:
            continue
        predicted += word + ' '
        star_text = word
        print(predicted)
        if word == '<END>':
            break

INFO:tensorflow:Restoring parameters from model.ckpt
--------------------------
--------------------------
<START>double 
--------------------------
--------------------------
<START>double } 
--------------------------
--------------------------
<START>double } } 
--------------------------
--------------------------
<START>double } } } 
--------------------------
--------------------------
<START>double } } } } 
--------------------------
--------------------------
<START>double } } } } double 
--------------------------
--------------------------
<START>double } } } } double double 
--------------------------
--------------------------
<START>double } } } } double double } 
--------------------------
--------------------------
<START>double } } } } double double } double 
--------------------------
--------------------------
<START>double } } } } double double } double } 
--------------------------
--------------------------
<START>double } } } } double double } double } double 
---

--------------------------
<START>double } } } } double double } double } double } double double } } } double } } } } double } double double double double double } double } double } } double } double } } } double } double double } } double } double 
