In [2]:
import tensorflow as tf
from keras import backend as K
import numpy as np
from keras.layers import Embedding
# print(tf.__version__)

Using TensorFlow backend.


In [3]:
def conv2d(x, w):
    return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME')

def maxpool2d(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

In [4]:
hidden_size=256
vb_size=18
batchsize=2
weights = {
    'W_conv1': tf.get_variable('W0', shape=(3,3,3,32), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv2': tf.get_variable('W1', shape=(3,3,32,32), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv3': tf.get_variable('W2', shape=(3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv4': tf.get_variable('W3', shape=(3,3,64,64), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv5': tf.get_variable('W4', shape=(3,3,64,128), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_conv6': tf.get_variable('W5', shape=(3,3,128,128), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_fc1': tf.get_variable('W6', shape=(28*28*128,1024), initializer=tf.contrib.layers.xavier_initializer()), 
    'W_fc2': tf.get_variable('W7', shape=(1024,50), initializer=tf.contrib.layers.xavier_initializer()), 
    'out': tf.get_variable('W8', dtype = tf.float64,shape=(hidden_size,vb_size), initializer=tf.contrib.layers.xavier_initializer()), 
    }
biases = {
    'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
    'bc2': tf.get_variable('B1', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
    'bc3': tf.get_variable('B2', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
    'bc4': tf.get_variable('B3', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
    'bc5': tf.get_variable('B4', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
    'bc6': tf.get_variable('B5', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
    'b_fc1': tf.get_variable('B6', shape=(1024), initializer=tf.contrib.layers.xavier_initializer()),
    'b_fc2': tf.get_variable('B7', shape=(50), initializer=tf.contrib.layers.xavier_initializer()),
    'out': tf.get_variable('B8', dtype = tf.float64,shape=(vb_size), initializer=tf.contrib.layers.xavier_initializer()),
    }

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [5]:
class GRU:
    """Implementation of a Gated Recurrent Unit (GRU) as described in [1].
    
    [1] Chung, J., Gulcehre, C., Cho, K., & Bengio, Y. (2014). Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555.
    
    Arguments
    ---------
    input_dimensions: int
        The size of the input vectors (x_t).
    hidden_size: int
        The size of the hidden layer vectors (h_t).
    dtype: obj
        The datatype used for the variables and constants (optional).
    """
    
    def __init__(self, input_dimensions, hidden_size, dtype=tf.float64):
        self.input_dimensions = input_dimensions
        self.hidden_size = hidden_size
        self.input_layer=[]
        
        # Weights for input vectors of shape (input_dimensions, hidden_size)
        self.Wr = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.input_dimensions, self.hidden_size), mean=0, stddev=0.01), name='Wr')
        self.Wz = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.input_dimensions, self.hidden_size), mean=0, stddev=0.01), name='Wz')
        self.Wh = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.input_dimensions, self.hidden_size), mean=0, stddev=0.01), name='Wh')
        
        # Weights for hidden vectors of shape (hidden_size, hidden_size)
        self.Ur = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size, self.hidden_size), mean=0, stddev=0.01), name='Ur')
        self.Uz = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size, self.hidden_size), mean=0, stddev=0.01), name='Uz')
        self.Uh = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size, self.hidden_size), mean=0, stddev=0.01), name='Uh')
        
        # Biases for hidden vectors of shape (hidden_size,)
        self.br = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size,), mean=0, stddev=0.01), name='br')
        self.bz = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size,), mean=0, stddev=0.01), name='bz')
        self.bh = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size,), mean=0, stddev=0.01), name='bh')
        
        # Define the input layer placeholder
        self.input_layer = tf.placeholder(dtype=tf.float64, shape=(None, None, input_dimensions), name='input')
        
        # Put the time-dimension upfront for the scan operator
        self.x_t = tf.transpose(self.input_layer, [1, 0, 2], name='x_t')
        
        # A little hack (to obtain the same shape as the input matrix) to define the initial hidden state h_0
        self.h_0 = tf.matmul(self.x_t[0, :, :], tf.zeros(dtype=tf.float64, shape=(input_dimensions, hidden_size)), name='h_0')
        
        # Perform the scan operator
        self.h_t_transposed = tf.scan(self.forward_pass, self.x_t, initializer=self.h_0, name='h_t_transposed')
        
        # Transpose the result back
        self.h_t = tf.transpose(self.h_t_transposed, [1, 0, 2], name='h_t')

    def forward_pass(self, h_tm1, x_t):
        """Perform a forward pass.
        
        Arguments
        ---------
        h_tm1: np.matrix
            The hidden state at the previous timestep (h_{t-1}).
        x_t: np.matrix
            The input vector.
        """
        # Definitions of z_t and r_t
        z_t = tf.sigmoid(tf.matmul(x_t, self.Wz) + tf.matmul(h_tm1, self.Uz) + self.bz)
        r_t = tf.sigmoid(tf.matmul(x_t, self.Wr) + tf.matmul(h_tm1, self.Ur) + self.br)
        
        # Definition of h~_t
        h_proposal = tf.tanh(tf.matmul(x_t, self.Wh) + tf.matmul(tf.multiply(r_t, h_tm1), self.Uh) + self.bh)
        
        # Compute the next hidden state
        h_t = tf.multiply(1 - z_t, h_tm1) + tf.multiply(z_t, h_proposal)
#         print(self.x_t.shape)
        
        print("h_t:",h_t.shape)
#         print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
        
        return h_t

In [41]:
# x = tf.placeholder("float", [None,28,28,1])
# y = tf.placeholder("float", [None, n_classes])
def cnn(x,weights,biases):
    print("in cnn")
    '''
    weights = {'W_conv1':tf.Variable(tf.random_normal([3,3,1,32])),#56
               'W_conv2':tf.Variable(tf.random_normal([3,3,32,32])),#56
               'W_conv3':tf.Variable(tf.random_normal([3,3,32,64])),#28
               'W_conv4':tf.Variable(tf.random_normal([3,3,64,64])),#28
               'W_conv5':tf.Variable(tf.random_normal([3,3,64,128])),#14
               'W_conv6':tf.Variable(tf.random_normal([3,3,128,128])),#14
               'W_fc1':tf.Variable(tf.random_normal([7*7*128,1024])),  # since 3 times maxpooling.. inputsize/2^3
               'W_fc2':tf.Variable(tf.random_normal([1024,1024]))
              }
                  # depending on what that repeat vector does

    biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
               'b_conv2':tf.Variable(tf.random_normal([32])),
               'b_conv3':tf.Variable(tf.random_normal([64])),
               'b_conv4':tf.Variable(tf.random_normal([64])),
               'b_conv5':tf.Variable(tf.random_normal([128])),
               'b_conv6':tf.Variable(tf.random_normal([128])),
               'b_fc1':tf.Variable(tf.random_normal([1024])),
               'b_fc2':tf.Variable(tf.random_normal([1024]))
             }
    '''
    
    print("-1")
#     x = tf.convert_to_tensor(x)
    print("00")
    print("bef",x.shape)
    x = tf.reshape(x, shape=[-1, 224, 224, 3])
    print("aft",x.shape)
    print("0")
    conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])+  biases['bc1'])
    print("********",weights['W_conv1'])
    print("1")
    print("conv1:",conv1.shape)
    conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['bc2'])
    print("2")
    print("conv2:",conv2.shape)
    conv2 = maxpool2d(conv2)
    print("3")
    print("maxpool:",conv2.shape)
    conv2 = tf.nn.dropout(conv2, 0.25)
#     print("dropout:",conv2.shape)
    print("okay")
    
    conv3 = tf.nn.relu(conv2d(conv2, weights['W_conv3']) + biases['bc3'])
    print("conv3:",conv3.shape)
    conv4 = tf.nn.relu(conv2d(conv3, weights['W_conv4']) + biases['bc4'])
    print("conv3:",conv3.shape)
    #conv4 = conv3
    conv4 = maxpool2d(conv4)
    print("maxpool:",conv4.shape)
    conv4 = tf.nn.dropout(conv4, 0.25)
    
    conv5 = tf.nn.relu(conv2d(conv4, weights['W_conv5']) + biases['bc5'])
    print("conv5:",conv5.shape)
    conv6 = tf.nn.relu(conv2d(conv5, weights['W_conv6']) + biases['bc6'])
    print("conv6:",conv6.shape)
    #conv6 = conv5
    conv6 = maxpool2d(conv6)
    print("conv6:",conv6.shape)
    conv6 = tf.nn.dropout(conv6, 0.25)

    fc1 = tf.reshape(conv6,[-1, weights['W_fc1'].get_shape().as_list()[0]])
    fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1'])+biases['b_fc1'])
    print("fc1:",fc1.shape)
    fc1 = tf.nn.dropout(fc1, 0.3)
    
    fc2 = tf.nn.relu(tf.matmul(fc1, weights['W_fc2'])+biases['b_fc2'])
    fc2 = tf.nn.dropout(fc2, 0.3)  
    #fc2 = fc1
    
#     out = tf.add(tf.matmul(fc2, weights['out']), biases['out'])
    print("fc2:",fc2.shape)
    print(fc2)
    
    x_norm = tf.layers.batch_normalization(fc2, training=True)
#     input_gru = tf.repeat(fc2,)
    
    print(x_norm.shape)
    return x_norm

# def Gru(hidden_size):  
#     gru = GRU(1024,hidden_size)

#     W_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(hidden_size, 1), mean=0, stddev=0.01))
#     b_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(1,), mean=0, stddev=0.01))
#     output = tf.map_fn(lambda h_t: tf.matmul(h_t, W_output) + b_output, gru.h_t)

#     return output

In [7]:
# import torch.utils.data as data
import cv2
import sys
from os import listdir
from os.path import join
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical

def resize_img(png_file_path):
        img_rgb = cv2.imread(png_file_path)
        img_grey = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
        img_adapted = cv2.adaptiveThreshold(img_grey, 255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY, 101, 9)
        img_stacked = np.repeat(img_adapted[...,None],3,axis=2)
        resized = cv2.resize(img_stacked, (224,224), interpolation=cv2.INTER_AREA)
        bg_img = 255 * np.ones(shape=(224,224,3))
#         print(bg_img.shape,resized.shape)
        bg_img[0:224, 0:224,:] = resized
        bg_img /= 255
        bg_img = np.rollaxis(bg_img, 2, 0)  
#         print(bg_img.shape)
        return bg_img
    
def load_doc(filename):
    file = open(filename, 'r',encoding='UTF-8')
    text = file.read()
    file.close()
    return text

class Dataset():
    def __init__(self, data_dir, input_transform=None, target_transform=None):
        self.data_dir = data_dir
        self.image_filenames = []
        self.texts = []
        all_filenames = listdir(data_dir)
        all_filenames.sort()
        for filename in (all_filenames):
            if filename[-3:] == "png":
                self.image_filenames.append(filename)
            else:
                text = '<START> ' + load_doc(self.data_dir+filename) + ' <END>'
                text = ' '.join(text.split())
                text = text.replace(',', ' ,')
                self.texts.append(text)
        self.input_transform = input_transform
        self.target_transform = target_transform
        
        # Initialize the function to create the vocabulary 
        tokenizer = Tokenizer(filters='', split=" ", lower=False)
        # Create the vocabulary 
        tokenizer.fit_on_texts([load_doc('vocabulary.vocab')])
        self.tokenizer = tokenizer
        # Add one spot for the empty word in the vocabulary 
        self.vocab_size = len(tokenizer.word_index) + 1
        # Map the input sentences into the vocabulary indexes
        self.train_sequences = tokenizer.texts_to_sequences(self.texts)
        # The longest set of boostrap tokens
        self.max_sequence = max(len(s) for s in self.train_sequences)
        # Specify how many tokens to have in each input sentence
        self.max_length = 48
        
        X, y, image_data_filenames = list(), list(), list()
        for img_no, seq in enumerate(self.train_sequences):
            in_seq, out_seq = seq[:-1], seq[1:]
            out_seq = to_categorical(out_seq, num_classes=self.vocab_size)
            image_data_filenames.append(self.image_filenames[img_no])
            X.append(in_seq)
            y.append(out_seq)
                
        self.X = X
        self.y = y
        self.image_data_filenames = image_data_filenames
        self.images = list()
        for image_name in self.image_data_filenames:
            image = resize_img(self.data_dir+image_name)
            self.images.append(image)

In [8]:
dir_name = 'all_data5/'
batch_size = 32
my_dateset = Dataset(dir_name)

In [9]:
x_train = np.array(my_dateset.images,dtype=np.float32)
for i in range(len(x_train)):
    x_train[i]=np.array(x_train[i],dtype=np.float32)
print(x_train.shape)
# batch_size = 128
# dataset = tf.data.Dataset.from_tensor_slices((x))
# iterator = dataset.repeat().batch(batch_size).make_initializable_iterator()
# data_batch = iterator.get_next()

(50, 3, 224, 224)


In [10]:
im = tf.placeholder(dtype=tf.float32, shape=(None,3,224,224), name='im')
model1 = cnn(im,weights,biases)

in cnn
-1
00
bef (?, 3, 224, 224)
aft (?, 224, 224, 3)
0
1
conv1: (?, 224, 224, 32)
2
conv2: (?, 224, 224, 32)
3
maxpool: (?, 112, 112, 32)
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
okay
conv3: (?, 112, 112, 64)
conv3: (?, 112, 112, 64)
maxpool: (?, 56, 56, 64)
conv5: (?, 56, 56, 128)
conv6: (?, 56, 56, 128)
conv6: (?, 28, 28, 128)
fc1: (?, 1024)
fc2: (?, 50)
Tensor("dropout_4/mul_1:0", shape=(?, 50), dtype=float32)
Instructions for updating:
Use keras.layers.BatchNormalization instead.  In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used (consult the `tf.keras.layers.batch_normalization` documentation).
Instructions for updating:
Please use `layer.__call__` method instead.
(?, 50)


In [11]:
gru = GRU(100,256)
hidden_size=256

# W_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(hidden_size, 18), mean=0, stddev=0.01),trainable=True)
# b_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(18,), mean=0, stddev=0.01),trainable=True)

W_output = weights['out']
b_output = biases['out']

# output = tf.map_fn(lambda h_t: tf.matmul(h_t, W_output) + b_output, gru.h_t)

output1 = tf.nn.relu(tf.matmul(gru.h_t,W_output)+b_output)
# out2 = tf.matmul(gru.h_t[0], W_output)+b_output

# tf.get_variable('W7', shape=(1024,50), initializer=tf.contrib.layers.xavier_initializer())

out3 = gru.h_t
print(out3.shape)

h_t: (?, 256)
(?, ?, 256)


In [12]:
y_train = my_dateset.X
y_train=np.array(y_train)
        
for i in range(len(y_train)):
    y_train[i]=np.array(y_train[i])
#     for j in range(len(x1[i])):
#         x1[i][j]=np.array(x1[i][j])
print(y_train.shape)
print(y_train[0].shape)
    

# x1=tf.constant(x1[0])
print("hi")
            
VOCAB_LEN=19
EMBED_SIZE=50
embeddings = tf.Variable(tf.random_uniform([VOCAB_LEN, EMBED_SIZE]))
caption_p = tf.placeholder(dtype=tf.int32, shape=(None,None), name='caption_p')
embed = tf.nn.embedding_lookup(embeddings, caption_p)


(50,)
(59,)
hi


In [13]:
expected = my_dateset.y
expected=np.array(expected)
for e in range(len(expected)):
    expected[e]=np.array(expected[e])
print(expected.shape)

(50,)


In [14]:
expected_output = tf.placeholder(dtype=tf.float64, shape=(None,None,None), name='expected_output')
loss = tf.reduce_sum(tf.squared_difference(output1 ,expected_output)) #/ float(1)
# e_loss = tf.placeholder(dtype=tf.float64,name='e_loss')
# epoch_loss = tf.reduce_sum(e_loss)
# train2 = tf.train.AdamOptimizer(0.0001).minimize(tf.reduce_sum(e_loss))
# train_step = tf.train.AdamOptimizer().minimize()
train_step = tf.train.GradientDescentOptimizer(0.001).minimize(loss)

In [15]:
import functools 
def pad(batch_y):
    print(batch_y.shape)
    x=0
    for y in batch_y:
        if(len(y)>x):
            x=len(y)
#     x = functools.reduce(lambda x,y: len(x) if(len(x)>len(y)) else len(y),batch_y)
    
    ret = []
    for y in range(len(batch_y)):
        res=np.zeros(x)
        s = batch_y[y]
        res[0:len(s)]=batch_y[y]
#         batch_y[y]=res
        ret.append(res)
    return np.array(ret)
        
        
# a=[[1,2],[1,2,3]]
# pad(a)

def pad2(batch_ex):
#     r = functools.reduce(lambda x,y: len(x) if(len(x)>len(y)) else len(y),batch_ex)
#     print(":::::",r)
    r=0
    c=0
    for ex in batch_ex:
        shape = ex.shape
#         print(shape)
        if(shape[0]>r):
            r=shape[0]
        if(shape[1]>c):
            c=shape[1]
#     c = functools.reduce(lambda x,y: len(x[0]) if(len(x[0])>len(y[0])) else len(y[0]),batch_ex)
#     print(":::::",c)
#     print(r,c)
    ret=[]
    for ex in batch_ex:
        res=np.zeros((r,c))
#         print(res.shape)
#         print(ex.shape)
        res[0:ex.shape[0],0:ex.shape[1]]=ex
        ret.append(res)
#     print(ret)
        
    return(np.array(ret))
    

In [45]:
epoch = 5
vocab_size = 19
batch_size=10

x_train = my_dateset.images
caption = my_dateset.X
expected = my_dateset.y

saver = tf.train.Saver()

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    loss_ar=[]
    for e in range(epoch):
        loss_no=[]
        print(e)
        for batch in range(len(x_train)//batch_size):
            print("batch ",batch)
            batch_x = x_train[batch*batch_size:min((batch+1)*batch_size,len(x_train))]
            batch_y = caption[batch*batch_size:min((batch+1)*batch_size,len(caption))] 
            batch_ex = expected[batch*batch_size:min((batch+1)*batch_size,len(expected))]
            print("x:",len(batch_x))
            print("y:",len(batch_y))
            print("ex:",len(batch_ex))
            
#             print(batch_y)
            
            batch_x = np.array(batch_x)
            for b in range(len(batch_x)):
                batch_x[b]=np.array(batch_x[b])
            batch_y = np.array(batch_y)
            for b in range(len(batch_y)):
                batch_y[b]=np.array(batch_y[b])
            batch_ex = np.array(batch_ex)
            for b in range(len(batch_ex)):
                batch_ex[b]=np.array(batch_ex[b])
                
            print("bex:",batch_ex.shape)
#             print(batch_ex[0].shape)
                
            batch_y = pad(batch_y)
            batch_ex = pad2(batch_ex)
            
#             batch_y = batch_y.reshape((-1,1))
#             print(batch_ex.shape)
            
            sess.run(init)
            # image = K.expand_dims(x[i], 0)
            # print("im:",image.shape)
            c = sess.run(model1,feed_dict={im:batch_x})
            print("c:",c)

            # em = K.expand_dims(x1[i], 0)

            inp2 = sess.run(embed,feed_dict={caption_p:batch_y})
            print("inp2:",inp2.shape)

            features_try = K.tile(K.expand_dims(c, 1), [1, K.shape(inp2)[1], 1])
            print("ktile:",features_try.shape)
            embeddings = tf.concat([features_try,inp2],2)
            print("emb:",embeddings.shape)
            inp = sess.run(embeddings)

            a = sess.run(output1,feed_dict={gru.input_layer:inp})
            print("a:",a.shape,"\n")

            # print("Sssss:",ex.shape)
            ls,tr = sess.run([loss,train_step],feed_dict ={expected_output:batch_ex,gru.input_layer:inp})
            print(ls/batch_size)
            loss_no.append(ls/batch_size)
            print("\n\n")
#         el = sess.run(epoch_loss,feed_dict={e_loss:loss_no})
        loss_ar.append(loss_no)

        print("-----------------------------------------------------------------") 
    save_path = saver.save(sess, "model.ckpt")

0
batch  0
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.4058447   0.          2.999239    0.         -0.32991424 -0.3275887
  -0.33317184 -0.3331824  -0.33306956 -0.29662657 -0.4928302  -0.33283177
  -0.6213964   2.9993382   2.885991   -0.48143667 -0.3330897  -0.33324265
  -0.7330547   0.         -0.49871385 -0.3333184  -0.4014366  -0.550711
   0.         -0.71380216 -0.18534857 -0.3332666  -0.49582785 -0.49701384
   0.         -0.33319393  0.         -0.3328213  -0.60792875 -0.44669247
  -0.8623663   0.         -0.31719363 -0.33324048 -0.49456447 -0.4798848
  -0.49468118  0.          0.         -0.33332312  0.         -0.36758327
  -0.3432122   0.        ]
 [-0.4058447   0.         -0.3332488   0.         -0.32991424 -0.3275887
  -0.33317184 -0.3331824  -0.33306956 -0.29662657 -0.4928302  -0.33283177
   2.0275764  -0.33365223 -0.4373671  -0.48143667 -0.3330897  -0.33324265
  -0.7330547   0.         -0.49871385 -0.3333184  -0.4014366   0.7576918
   0.         -0.71380216 -0.3513611  -0.

a: (10, 78, 18) 

102.18224080513444



batch  2
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 2.9993289  -0.35956955 -0.4400803  -0.46603298 -0.33318052 -0.43443552
  -0.49326006 -0.33321598 -0.50246936  0.85350686 -0.7693492  -0.5148555
   0.         -0.33315852 -0.33311495 -0.49525204  2.205582   -0.333273
  -0.3489832   0.9020503   0.          0.          0.          0.
  -0.44229487 -0.3317285  -0.40552816 -0.33308423 -0.32980013 -0.38221517
  -0.58516526  0.         -0.3384715   0.         -0.47255576 -0.58081365
   0.         -0.48239416 -0.33331114 -0.5451907  -0.4593435  -0.33244312
  -0.55651647 -0.4996935   2.7624903  -0.3331155  -0.450877   -0.511038
  -0.6278729  -0.33331153]
 [-0.33691484 -0.35956955 -0.4400803  -0.46603298 -0.33318052 -0.43443552
  -0.49326006 -0.33321598 -0.50246936 -0.45206732 -0.7693492   2.7874393
   0.         -0.33315852 -0.33311495 -0.49525204 -0.6220601  -0.333273
  -0.3489832  -0.45564634  0.          0.          0.          0.
  -0.44229487  2.9855

a: (10, 78, 18) 

89.27103055062283



batch  4
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.3330706   0.         -0.4052892  -0.5666666  -0.6441199  -0.3778926
  -0.3332872   1.8059597   0.          2.9950488   0.          1.8173703
  -0.4815442  -0.3330752  -0.61397326  2.8321443  -0.33333126 -0.47261208
  -0.36092916  0.         -0.52852666 -0.4148099  -0.5142574   0.
  -0.49440423 -0.33074844 -0.42635232 -0.33290923  0.         -0.33331755
  -0.59121054 -0.6368483  -0.40669626 -0.40118048  1.8217037  -0.33324853
  -0.33332172  0.         -0.33332294  0.          2.9001594  -0.5060233
   0.         -0.44243166 -0.33331108  0.          0.         -0.61496526
  -0.47635192 -0.33320192]
 [-0.3330706   0.         -0.4052892  -0.5666666   0.79415756  0.04745796
  -0.3332872  -0.57580644  0.         -0.3327832   0.         -0.49834839
  -0.4815442  -0.3330752   2.3243275  -0.43756777  2.9999812  -0.47261208
  -0.36092916  0.          1.823677   -0.4148099  -0.5142574   0.
  -0.49440423  2.

a: (10, 94, 18) 

127.36560011257794



batch  1
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.47764212 -0.45254448 -0.48936117  2.8193774  -0.33332488  2.1978233
  -0.43335235 -0.3333113  -0.65028685  0.         -0.31299654 -0.33243668
  -0.49064177  2.999382   -0.37544408 -0.32408798 -0.6607889   0.
  -0.33332467 -0.46600163  2.4002829  -0.33324036 -0.33332554 -0.33315364
   2.9997964   1.2542646  -0.33320317 -0.3332718  -0.33187082 -0.33329794
  -0.5929638  -0.3324584  -0.46266928 -0.2733562  -0.3333109   2.9999428
  -0.33328238 -0.33328682  0.          1.7926117  -0.43731827 -0.31168273
  -0.49336594 -0.44728032  1.7040236   0.         -0.5382026  -0.33325198
  -0.33331376 -0.6094179 ]
 [-0.47764212 -0.45254448  1.4992802  -0.4406184  -0.33332488 -0.61609954
  -0.43335235 -0.3333113   2.5604064   0.         -0.31299654 -0.33243668
  -0.49064177 -0.3332647  -0.37544408 -0.32408798  0.25285643  0.
  -0.33332467  2.6732445  -0.49022806 -0.33324036 -0.33332554  2.9983826
  -0.3333107  -0

a: (10, 76, 18) 

74.87175349698



batch  3
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.33278963  0.         -0.33332813 -0.48016536 -0.33322304  0.
  -0.3332666   2.3537364   0.         -0.58455175 -0.48525795  2.988248
   0.         -0.6450517   2.9549665  -0.433156   -0.64113593 -0.33305597
  -0.39539084 -0.37415105 -0.33329025 -0.41820407  0.         -0.38469857
  -0.5025042   2.9981828   0.          0.64295113 -0.4524018  -0.49157718
  -0.49658066  0.          0.         -0.33322948 -0.33322042 -0.33322757
  -0.4992482  -0.34158957  0.         -0.37923238 -0.5301428   0.
  -0.41549245 -0.4871611  -0.6807286  -0.42925206  0.         -0.3982921
  -0.425511   -0.33331212]
 [-0.33278963  0.         -0.33332813 -0.48016536 -0.33322304  0.
  -0.3332666   1.5603106   0.          0.7515804  -0.48525795 -0.33202758
   0.         -0.6450517  -0.39249384 -0.433156   -0.64113593 -0.33305597
  -0.39539084 -0.37415105 -0.33329025 -0.41820407  0.         -0.38469857
  -0.5025042  -0.33313146  0

a: (10, 78, 18) 

93.81193785785936



-----------------------------------------------------------------
2
batch  0
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.4589984  -0.32829314 -0.33315444 -0.5718134   0.          0.
  -0.49949366 -0.36049274 -0.3582023   0.         -0.47859097 -0.45477
  -0.33325428 -0.33332005  0.         -0.33349    -0.4195852   2.9995437
  -0.09401948  0.          0.          0.         -0.47899592 -0.3261889
   2.2425845  -0.33321145 -0.62112224 -0.4984321  -0.3890593  -0.33327913
  -0.4147992  -0.31830007 -0.33328205  1.3518959  -0.33239627  2.5157194
  -0.49022782 -0.3332684  -0.49965873 -0.3333231  -0.3333267  -0.4996665
  -0.3624129  -0.33328968 -0.3332928   0.         -0.6085848   0.
  -0.33312738 -0.333185  ]
 [-0.4589984  -0.32829314 -0.33315444  2.343383    0.          0.
  -0.49949366 -0.36049274 -0.12711093  0.         -0.47859097 -0.45477
  -0.33325428 -0.33332005  0.         -0.33201835 -0.4195852  -0.33328265
  -0.09401948  0.          0.         

a: (10, 78, 18) 

101.67084062939051



batch  2
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 0.          0.         -0.302153   -0.3955648  -0.40641057 -0.4974693
  -0.49997708  0.          2.983715    0.          0.         -0.33332533
  -0.33321646 -0.6319618   0.         -0.33332437  2.786366    0.
   0.         -0.47669163 -0.46087265 -0.32932135 -0.33325282  2.9745977
   2.9994998  -0.7152168  -0.35780346  0.          0.         -0.52201
  -0.42561227 -0.33273643  0.         -0.33330438 -0.5115782  -0.53408206
   2.995786    2.1189768  -0.49530697  2.3656173  -0.49592644 -0.57481176
  -0.47983497 -0.48260242  0.          0.         -0.33320016 -0.3842756
   0.         -0.33331376]
 [ 0.          0.         -0.302153   -0.3955648  -0.40641057 -0.4974693
  -0.49997708  0.         -0.3315239   0.          0.         -0.33332533
  -0.33321646  1.9411464   0.         -0.33332437 -0.44763803  0.
   0.         -0.47669163  2.7700732  -0.32932135  2.9992752  -0.37842554
  -0.33327776 -0.715

a: (10, 78, 18) 

91.4534107831195



batch  4
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.33333072 -0.3320168   0.         -0.33330464  2.789112    0.
  -0.33330938  0.         -0.618586    0.          1.1020787   0.
   0.         -0.49994287  0.          0.         -0.7890229   2.9985113
   0.         -0.32350507 -0.6305279   0.         -0.4580825  -0.33332863
  -0.33327428 -0.41388926 -0.33332735 -0.47612157  0.         -0.43810204
   0.         -0.33331472 -0.33332917  2.7154107   0.          1.2358458
  -0.4992484  -0.33265573  0.         -0.37800932 -0.33084404 -0.4565577
  -0.33327022 -0.49864247  2.9997215   0.         -0.43594253 -0.3332971
   0.          0.6700214 ]
 [-0.33333072 -0.3320168   0.         -0.33330464  0.78787243  0.
  -0.33330938  0.         -0.618586    0.         -0.4690653   0.
   0.         -0.49994287  0.          0.         -0.7890229  -0.33316794
   0.         -0.32350507 -0.6305279   0.          2.7282696  -0.33332863
  -0.33327428 -0.41388926 -0.333327

a: (10, 94, 18) 

106.9233970352906



batch  1
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.33330265 -0.33331543  0.08871287 -0.33924997 -0.36193234  2.600082
   0.         -0.4971713   0.         -0.33274636 -0.3332373  -0.3313429
  -0.31972978  0.          1.491097   -0.3358337  -0.38053346  2.9893837
  -0.3332884  -0.3546753  -0.33299857  2.9740012   0.         -0.33330926
  -0.37629837 -0.3313588   0.         -0.43598834 -0.3575281  -0.660072
  -0.33297136  0.         -0.25471437  1.6415601   2.555745    0.
  -0.5523634   0.         -0.34570366 -0.3333     -0.45050445 -0.43855536
   2.999881   -0.33331856  0.         -0.570834   -0.3331439  -0.33324805
  -0.33323073 -0.33328253]
 [-0.33330265 -0.33331543 -0.5597586  -0.33924997 -0.36193234 -0.47474405
   0.          1.7523865   0.          2.9947176  -0.3332373  -0.3313429
  -0.31972978  0.         -0.78581953 -0.3358337  -0.38053346 -0.36314473
  -0.3332884  -0.3546753  -0.33299857 -0.37932783  0.         -0.33330926
  -0.37629837

a: (10, 76, 18) 

118.12256838865304



batch  3
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 1.343924   -0.5473206   0.         -0.3332987  -0.33330458 -0.6771864
   0.         -0.33325234 -0.33331951  0.         -0.44382316  0.
   0.          0.0537135  -0.33332467 -0.33331686 -0.33328164  0.
  -0.6257376  -0.32760122  0.         -0.28417456  0.         -0.36365944
   1.3         1.2763054   0.         -0.3331789  -0.3333159  -0.51415825
   0.          0.         -0.33330005  0.         -0.33094773  0.
  -0.3328789  -0.3333261  -0.5292616  -0.33319053 -0.33332893 -0.3291758
  -0.48922038  0.         -0.33332828  0.          0.         -0.38560686
  -0.6213261  -0.33332354]
 [-0.48249638 -0.5473206   0.         -0.3332987  -0.33330458 -0.6771864
   0.         -0.33325234  2.9998755   0.         -0.44382316  0.
   0.         -0.51648015 -0.33332467 -0.33331686 -0.33328164  0.
   1.8953404  -0.32760122  0.         -0.28417456  0.         -0.36365944
  -0.65161675 -0.47907406  0.         -0

a: (10, 78, 18) 

73.88019047307672



-----------------------------------------------------------------
4
batch  0
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.333315    1.0423753  -0.328078   -0.37513202  0.         -0.33329302
   0.         -0.33329722 -0.5684066  -0.49907097 -0.33308402  0.
  -0.3332786   0.         -0.33331913 -0.47605816 -0.3332856   0.
  -0.49336064 -0.47672814 -0.33331978  1.4865541  -0.3333213  -0.42002183
   0.          0.         -0.33317828 -0.36022228  0.285331    0.
   2.9294906  -0.33284858 -0.7238769  -0.5335648  -0.7809447   0.
   1.4579549  -0.33331826  0.         -0.22179142  0.          2.6412098
   2.2497373   0.         -0.4310195  -0.4971989  -0.4870686  -0.33263794
  -0.33324155  0.        ]
 [-0.333315   -0.7325272  -0.328078   -0.37513202  0.         -0.33329302
   0.         -0.33329722  1.0463657  -0.49907097 -0.33308402  0.
  -0.3332786   0.         -0.33331913  1.2209198  -0.3332856   0.
  -0.49336064  2.5807376  -0.33331978 -0.7770706  -0.

a: (10, 78, 18) 

125.6588276473502



batch  2
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[ 0.         -0.42607427 -0.44225258 -0.33322218  0.         -0.4462236
   2.9983323   2.8959284  -0.4989193  -0.47131026 -0.33331886  1.0994109
  -0.60365665 -0.40609553 -0.33079532  0.         -0.18716514 -0.3903234
  -0.33332267 -0.47084656 -0.33328724  2.9988804   0.         -0.5208704
   0.         -0.47453716 -0.47784793  0.         -0.3705049   0.
  -0.65868104 -0.33250776 -0.3332534  -0.33331195 -0.49563676 -0.33210188
  -0.41084406 -0.43796232  0.          0.         -0.3332645  -0.48493207
  -0.6406     -0.63028854 -0.3332676   0.         -0.3333227  -0.36862883
  -0.33511955 -0.3671256 ]
 [ 0.         -0.42607427 -0.44225258 -0.33322218  0.          0.64948094
  -0.333148   -0.419031   -0.4989193  -0.47131026 -0.33331886 -0.6122781
  -0.60365665 -0.40609553 -0.33079532  0.         -0.61813724 -0.3903234
  -0.33332267 -0.47084656 -0.33328724 -0.33320892  0.         -0.5208704
   0.        

a: (10, 78, 18) 

79.03260734343125



batch  4
x: 10
y: 10
ex: 10
bex: (10,)
(10,)
c: [[-0.49425593  0.         -0.4713257  -0.47955078  0.          0.
  -0.4828919  -0.45761514  1.4022491  -0.33330977  0.         -0.44196433
  -0.33327362 -0.33332896 -0.4380067   0.          2.9994621   1.8514516
  -0.333278   -0.22081873 -0.4574735  -0.3333199  -0.3333297  -0.49811858
  -0.3332057   0.          2.9181223  -0.46644458  0.         -0.41781074
  -0.42357078  0.          0.         -0.6299082  -0.33054566 -0.33326605
   0.         -0.37743208  1.956243   -0.33326453 -0.33332944  2.9996438
  -0.33321998  0.54860675 -0.3333109  -0.43259895  1.0830404  -0.36963624
  -0.4530887  -0.3556028 ]
 [-0.49425593  0.         -0.4713257  -0.372915    0.          0.
  -0.4828919  -0.45761514 -0.8019552   2.9997878   0.          0.72232527
  -0.33327362 -0.33332896 -0.4380067   0.         -0.3332736  -0.5407652
  -0.333278   -0.50175506 -0.4574735  -0.3333199   2.9999673  -0.49811858
  -0.3332057   0.

In [30]:
print(loss_ar)

[[106.53009034112901, 107.16418833295738, 106.77142667748276, 114.22640320019138, 91.76424937083229], [92.44399568563824, 86.00318860127234, 128.21579409125792, 80.52351852322012, 111.8744779487715], [98.79295305845828, 108.39864939072831, 97.07914626179101, 95.17170994886085, 90.78989509532396], [128.3234106889701, 102.74197337528126, 68.43968570710109, 95.83514246347738, 99.54980467387468], [109.30006146051605, 104.13430216133307, 110.76669137876607, 93.23209285029998, 70.9966157244445]]


In [31]:
def word_for_id(integer, tokenizer):
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None
def load_val_images(data_dir):
    image_filenames =[]
    images = []
    all_filenames = listdir(data_dir)
    all_filenames.sort()
    for filename in (all_filenames):
        if filename[-3:] == "png":
            image_filenames.append(filename)
    for name in image_filenames:
        image = resize_img(data_dir+name)
        images.append(image)
    return images

In [32]:
decoded_words = []
star_text = '<START> '
image = load_val_images('all_data/')[0]
img_tensor=np.expand_dims(np.array(image),0)
#img_tensor=np.array(image)
''''image = Variable(torch.FloatTensor([image]))'''
predicted = '<START> '
 
img_tensor.shape

(1, 3, 224, 224)

In [25]:
predicted='<START>'
with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    for di in range(50):
        sequence = my_dateset.tokenizer.texts_to_sequences([star_text])[0]
#         print(sequence)
        decoder_input = np.array(sequence).reshape(-1,1)
        c = sess.run(model1,feed_dict={im:img_tensor})
        print(c)
        inp2 = sess.run(embed,feed_dict={caption_p:decoder_input})
        features_try = K.tile(K.expand_dims(c, 1), [1, K.shape(inp2)[1], 1])
        embeddings = tf.concat([features_try,inp2],2)
        inp = sess.run(embeddings)
        a = sess.run(output1,feed_dict={gru.input_layer:inp})
        data=list(a[0][0])
        i=data.index(max(data))
        word = word_for_id(i,my_dateset.tokenizer)
        if word is None:
            continue
        predicted += word + ' '
        star_text = word
#         print(predicted)
        if word == '<END>':
            break

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]


In [34]:
hidden_size=256
vb_size=18
batchsize=2
weights = {
    'W_conv1': tf.get_variable('W0', shape=(3,3,3,32)), 
    'W_conv2': tf.get_variable('W1', shape=(3,3,32,32)), 
    'W_conv3': tf.get_variable('W2', shape=(3,3,32,64)), 
    'W_conv4': tf.get_variable('W3', shape=(3,3,64,64)), 
    'W_conv5': tf.get_variable('W4', shape=(3,3,64,128)), 
    'W_conv6': tf.get_variable('W5', shape=(3,3,128,128)), 
    'W_fc1': tf.get_variable('W6', shape=(28*28*128,1024)), 
    'W_fc2': tf.get_variable('W7', shape=(1024,50)), 
    'out': tf.get_variable('W8', dtype = tf.float64,shape=(hidden_size,vb_size)), 
    }
biases = {
    'bc1': tf.get_variable('B0', shape=(32)),
    'bc2': tf.get_variable('B1', shape=(32)),
    'bc3': tf.get_variable('B2', shape=(64)),
    'bc4': tf.get_variable('B3', shape=(64)),
    'bc5': tf.get_variable('B4', shape=(128)),
    'bc6': tf.get_variable('B5', shape=(128)),
    'b_fc1': tf.get_variable('B6', shape=(1024)),
    'b_fc2': tf.get_variable('B7', shape=(50)),
    'out': tf.get_variable('B8', dtype = tf.float64,shape=(vb_size),),
    }

ValueError: Variable W0 already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 1748, in __init__
    self._traceback = tf_stack.extract_stack()
  File "/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 3426, in _create_op_internal
    op_def=op_def)
  File "/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 3357, in create_op
    attrs, op_def, compute_device)
  File "/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/opt/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/op_def_library.py", line 794, in _apply_op_helper
    op_def=op_def)


In [47]:
predicted='<START>'
with tf.Session() as sess:
#     init = tf.global_variables_initializer()
#     sess.run(init)
    saver.restore(sess, "model.ckpt")
    print("####",weights['W_conv1'].eval())
    for di in range(50):
        sequence = my_dateset.tokenizer.texts_to_sequences([star_text])[0]
#         print(sequence)
        decoder_input = np.array(sequence).reshape(-1,1)
#         c = sess.run(model1,feed_dict={im:img_tensor})
        print("--------------------------")
        print(model1.eval(feed_dict={im:img_tensor}))
        print("--------------------------")
        print(di, c)
        inp2 = sess.run(embed,feed_dict={caption_p:decoder_input})
        features_try = K.tile(K.expand_dims(c, 1), [1, K.shape(inp2)[1], 1])
        embeddings = tf.concat([features_try,inp2],2)
        inp = sess.run(embeddings)
        a = sess.run(output1,feed_dict={gru.input_layer:inp})
        data=list(a[0][0])
        i=data.index(max(data))
        word = word_for_id(i,my_dateset.tokenizer)
        if word is None:
            continue
        predicted += word + ' '
        star_text = word
#         print(predicted)
        if word == '<END>':
            break

INFO:tensorflow:Restoring parameters from model.ckpt
#### [[[[ 0.0552844  -0.09191938 -0.12938927  0.07782622 -0.12169811
     0.09090897 -0.02643431  0.06383094 -0.09371467 -0.07359008
    -0.03715567 -0.01405276  0.06506415  0.02663651 -0.0562261
    -0.02357069 -0.00449616 -0.02680689  0.13541932  0.03542498
     0.0868004   0.02253228  0.11070004 -0.0911836   0.08838652
    -0.02933244 -0.11010709  0.01399221 -0.03102589  0.03630301
    -0.05771044  0.05062312]
   [ 0.03109528 -0.05311945  0.12042432 -0.04578728 -0.12683907
     0.00772712  0.01820713  0.07549974 -0.07955774  0.09169678
    -0.06338755  0.05540203 -0.06958147 -0.08530389 -0.09344459
     0.06714588 -0.10425292  0.11352377  0.02534378 -0.11695945
    -0.0411216  -0.01120187 -0.01796146 -0.09182034 -0.06286044
    -0.0237344   0.13466601 -0.01374276 -0.12266739  0.05872904
     0.10924597 -0.03264557]
   [ 0.01236536  0.10680197 -0.01896941  0.02156503  0.10486871
     0.09723537 -0.10796832 -0.03612825 -0.01752386 -

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0.]]
--------------------------
0 [[-0.49425593  0.         -0.4713257  -0.47955078  0.          0.
  -0.4828919  -0.45761514  1.4022491  -0.33330977  0.         -0.44196433
  -0.33327362 -0.33332896 -0.4380067   0.          2.9994621   1.8514516
  -0.333278   -0.22081873 -0.4574735  -0.3333199  -0.3333297  -0.49811858
  -0.3332057   0.          2.9181223  -0.46644458  0.         -0.41781074
  -0.42357078  0.          0.         -0.6299082  -0.33054566 -0.33326605
   0.         -0.37743208  1.956243   -0.33326453 -0.33332944  2.9996438
  -0.33321998  0.54860675 -0.3333109  -0.43259895  1.0830404  -0.36963624
  -0.4530887  -0.3556028 ]
 [-0.49425593  0.         -0.4713257  -0.372915    0.          0.
  -0.4828919  -0.45761514 -0.8019552   2.9997878   0.          0.72232527
  -0.33327362 -0.33332896 -0.4380067   0.         -0.3332736  -0

ValueError: Dimension 0 in both shapes must be equal, but are 10 and 1. Shapes are [10,1] and [1,1]. for 'concat_626' (op: 'ConcatV2') with input shapes: [10,1,50], [1,1,50], [] and with computed input tensors: input[2] = <2>.