In [1]:
import tensorflow as tf
from keras import backend as K
import numpy as np
from keras.layers import Embedding
# print(tf.__version__)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [2]:
def conv2d(x, w):
    return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME')

def maxpool2d(x):
    return tf.nn.max_pool(x, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')

In [3]:
class GRU:
    """Implementation of a Gated Recurrent Unit (GRU) as described in [1].
    
    [1] Chung, J., Gulcehre, C., Cho, K., & Bengio, Y. (2014). Empirical evaluation of gated recurrent neural networks on sequence modeling. arXiv preprint arXiv:1412.3555.
    
    Arguments
    ---------
    input_dimensions: int
        The size of the input vectors (x_t).
    hidden_size: int
        The size of the hidden layer vectors (h_t).
    dtype: obj
        The datatype used for the variables and constants (optional).
    """
    
    def __init__(self, input_dimensions, hidden_size, inputs,dtype=tf.float32):
        self.input_dimensions = input_dimensions
        self.hidden_size = hidden_size
        self.input_layer=[]
        
        # Weights for input vectors of shape (input_dimensions, hidden_size)
        self.Wr = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.input_dimensions, self.hidden_size), mean=0, stddev=0.01), name='Wr')
        self.Wz = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.input_dimensions, self.hidden_size), mean=0, stddev=0.01), name='Wz')
        self.Wh = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.input_dimensions, self.hidden_size), mean=0, stddev=0.01), name='Wh')
        
        # Weights for hidden vectors of shape (hidden_size, hidden_size)
        self.Ur = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size, self.hidden_size), mean=0, stddev=0.01), name='Ur')
        self.Uz = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size, self.hidden_size), mean=0, stddev=0.01), name='Uz')
        self.Uh = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size, self.hidden_size), mean=0, stddev=0.01), name='Uh')
        
        # Biases for hidden vectors of shape (hidden_size,)
        self.br = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size,), mean=0, stddev=0.01), name='br')
        self.bz = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size,), mean=0, stddev=0.01), name='bz')
        self.bh = tf.Variable(tf.truncated_normal(dtype=dtype, shape=(self.hidden_size,), mean=0, stddev=0.01), name='bh')
        
        # Define the input layer placeholder
        self.input_layer = inputs
        
        # Put the time-dimension upfront for the scan operator
        self.x_t = tf.transpose(self.input_layer, [1, 0, 2], name='x_t')
        
        # A little hack (to obtain the same shape as the input matrix) to define the initial hidden state h_0
        self.h_0 = tf.matmul(self.x_t[0, :, :], tf.zeros(dtype=tf.float32, shape=(input_dimensions, hidden_size)), name='h_0')
        
        # Perform the scan operator
        self.h_t_transposed = tf.scan(self.forward_pass, self.x_t, initializer=self.h_0, name='h_t_transposed')
        
        # Transpose the result back
        self.h_t = tf.transpose(self.h_t_transposed, [1, 0, 2], name='h_t')

    def forward_pass(self, h_tm1, x_t):
        """Perform a forward pass.
        
        Arguments
        ---------
        h_tm1: np.matrix
            The hidden state at the previous timestep (h_{t-1}).
        x_t: np.matrix
            The input vector.
        """
        # Definitions of z_t and r_t
        z_t = tf.sigmoid(tf.matmul(x_t, self.Wz) + tf.matmul(h_tm1, self.Uz) + self.bz)
        r_t = tf.sigmoid(tf.matmul(x_t, self.Wr) + tf.matmul(h_tm1, self.Ur) + self.br)
        
        # Definition of h~_t
        h_proposal = tf.tanh(tf.matmul(x_t, self.Wh) + tf.matmul(tf.multiply(r_t, h_tm1), self.Uh) + self.bh)
        
        # Compute the next hidden state
        h_t = tf.multiply(1 - z_t, h_tm1) + tf.multiply(z_t, h_proposal)
#         print(self.x_t.shape)
        
        print("h_t:",h_t.shape)
#         print("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
        
        return h_t

In [4]:
epsilon = 1e-3

In [5]:
def batch_norm_wrapper(inputs, is_training, decay = 0.999):

    scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
    beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
    pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
    pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)

    if is_training:
        batch_mean, batch_var = tf.nn.moments(inputs,[0])
        train_mean = tf.assign(pop_mean,
                               pop_mean * decay + batch_mean * (1 - decay))
        train_var = tf.assign(pop_var,
                              pop_var * decay + batch_var * (1 - decay))
        with tf.control_dependencies([train_mean, train_var]):
            return tf.nn.batch_normalization(inputs,batch_mean, batch_var, beta, scale, epsilon)
    else:
        return tf.nn.batch_normalization(inputs,pop_mean, pop_var, beta, scale, epsilon)

In [6]:
vb_size=18
batchsize=2
weights = {
    'W_conv1': tf.get_variable('W0', shape=(3,3,3,32), initializer=tf.contrib.layers.variance_scaling_initializer()), 
    'W_conv2': tf.get_variable('W1', shape=(3,3,32,32), initializer=tf.contrib.layers.variance_scaling_initializer()), 
    'W_conv3': tf.get_variable('W2', shape=(3,3,32,64), initializer=tf.contrib.layers.variance_scaling_initializer()), 
    'W_conv4': tf.get_variable('W3', shape=(3,3,64,64), initializer=tf.contrib.layers.variance_scaling_initializer()), 
    'W_conv5': tf.get_variable('W4', shape=(3,3,64,128), initializer=tf.contrib.layers.variance_scaling_initializer()), 
    'W_conv6': tf.get_variable('W5', shape=(3,3,128,128), initializer=tf.contrib.layers.variance_scaling_initializer()), 
    'W_fc1': tf.get_variable('W6', shape=(28*28*128,1024), initializer=tf.contrib.layers.variance_scaling_initializer()), 
    'W_fc2': tf.get_variable('W7', shape=(1024,1024), initializer=tf.contrib.layers.variance_scaling_initializer()), 
    'Wout_gru1': tf.get_variable('W8', dtype = tf.float32,shape=(256,256), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'Wout_gru_1': tf.get_variable('W10', dtype = tf.float32,shape=(256,256), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'Wout_gru2': tf.get_variable('W9', dtype = tf.float32,shape=(512,vb_size), initializer=tf.contrib.layers.variance_scaling_initializer())
    }
biases = {
    'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'bc2': tf.get_variable('B1', shape=(32), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'bc3': tf.get_variable('B2', shape=(64), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'bc4': tf.get_variable('B3', shape=(64), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'bc5': tf.get_variable('B4', shape=(128), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'bc6': tf.get_variable('B5', shape=(128), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'b_fc1': tf.get_variable('B6', shape=(1024), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'b_fc2': tf.get_variable('B7', shape=(1024), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'Bout_gru1':tf.get_variable('B8', dtype = tf.float32,shape=(256), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'Bout_gru_1':tf.get_variable('B10', dtype = tf.float32,shape=(256), initializer=tf.contrib.layers.variance_scaling_initializer()),
    'Bout_gru2': tf.get_variable('B9', dtype = tf.float32,shape=(vb_size), initializer=tf.contrib.layers.variance_scaling_initializer())
    }

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [7]:
# x = tf.placeholder("float", [None,28,28,1])
# y = tf.placeholder("float", [None, n_classes])
def cnn_test(x,weights,biases):
    print("in cnn")
    
    '''
    weights = {'W_conv1':tf.Variable(tf.random_normal([3,3,1,32])),#56
               'W_conv2':tf.Variable(tf.random_normal([3,3,32,32])),#56
               'W_conv3':tf.Variable(tf.random_normal([3,3,32,64])),#28
               'W_conv4':tf.Variable(tf.random_normal([3,3,64,64])),#28
               'W_conv5':tf.Variable(tf.random_normal([3,3,64,128])),#14
               'W_conv6':tf.Variable(tf.random_normal([3,3,128,128])),#14
               'W_fc1':tf.Variable(tf.random_normal([7*7*128,1024])),  # since 3 times maxpooling.. inputsize/2^3
               'W_fc2':tf.Variable(tf.random_normal([1024,1024]))
              }
                  # depending on what that repeat vector does

    biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
               'b_conv2':tf.Variable(tf.random_normal([32])),
               'b_conv3':tf.Variable(tf.random_normal([64])),
               'b_conv4':tf.Variable(tf.random_normal([64])),
               'b_conv5':tf.Variable(tf.random_normal([128])),
               'b_conv6':tf.Variable(tf.random_normal([128])),
               'b_fc1':tf.Variable(tf.random_normal([1024])),
               'b_fc2':tf.Variable(tf.random_normal([1024]))
             }
    '''
    
    print("-1")
#     x = tf.convert_to_tensor(x)
    print("00")
    print("bef",x.shape)
    x = tf.reshape(x, shape=[-1, 224, 224, 3])
    print("aft",x.shape)
    print("0")
    conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])+  biases['bc1'])
    print("********",weights['W_conv1'])
    print("1")
    print("conv1:",conv1.shape)
    conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['bc2'])
    print("2")
    print("conv2:",conv2.shape)
    conv2 = maxpool2d(conv2)
    print("3")
    print("maxpool:",conv2.shape)
#    conv2 = tf.nn.dropout(conv2, 0.25)
#     print("dropout:",conv2.shape)
    print("okay")
    
    conv3 = tf.nn.relu(conv2d(conv2, weights['W_conv3']) + biases['bc3'])
    print("conv3:",conv3.shape)
    conv4 = tf.nn.relu(conv2d(conv3, weights['W_conv4']) + biases['bc4'])
    print("conv3:",conv3.shape)
    #conv4 = conv3
    conv4 = maxpool2d(conv4)
    print("maxpool:",conv4.shape)
#    conv4 = tf.nn.dropout(conv4, 0.25)
    
    conv5 = tf.nn.relu(conv2d(conv4, weights['W_conv5']) + biases['bc5'])
    print("conv5:",conv5.shape)
    conv6 = tf.nn.relu(conv2d(conv5, weights['W_conv6']) + biases['bc6'])
    print("conv6:",conv6.shape)
    #conv6 = conv5
    conv6 = maxpool2d(conv6)
    print("conv6:",conv6.shape)
#    conv6 = tf.nn.dropout(conv6, 0.25)

    fc1 = tf.reshape(conv6,[-1, weights['W_fc1'].get_shape().as_list()[0]])
    fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1'])+biases['b_fc1'])
    print("fc1:",fc1.shape)
#    fc1 = tf.nn.dropout(fc1, 0.3)
    
    fc2 = tf.nn.relu(tf.matmul(fc1, weights['W_fc2'])+biases['b_fc2'])
#    fc2 = tf.nn.dropout(fc2, 0.3)  
    #fc2 = fc1
    
#     out = tf.add(tf.matmul(fc2, weights['out']), biases['out'])
    print("fc2:",fc2.shape)
    print(fc2)
    
#     x_norm = batch_norm_wrapper(fc2,is_training)
    
    
#     inputs=fc2
#     scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
#     beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
#     pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
#     pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
#     decay=0.9999

#     if is_training:
#         batch_mean, batch_var = tf.nn.moments(inputs,[0])
#         train_mean = tf.assign(pop_mean,
#                                pop_mean * decay + batch_mean * (1 - decay))
#         train_var = tf.assign(pop_var,
#                               pop_var * decay + batch_var * (1 - decay))
#         with tf.control_dependencies([train_mean, train_var]):
#             return tf.nn.batch_normalization(inputs,batch_mean, batch_var, beta, scale, epsilon)
#     else:
#         return tf.nn.batch_normalization(inputs,pop_mean, pop_var, beta, scale, epsilon)
    
    
#     x_norm = tf.layers.batch_normalization(fc2, training=True)
#     input_gru = tf.repeat(fc2,)
    
#     print(x_norm.shape)
    
    return fc2

# def Gru(hidden_size):  
#     gru = GRU(1024,hidden_size)

#     W_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(hidden_size, 1), mean=0, stddev=0.01))
#     b_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(1,), mean=0, stddev=0.01))
#     output = tf.map_fn(lambda h_t: tf.matmul(h_t, W_output) + b_output, gru.h_t)

#     return output

In [8]:
# x = tf.placeholder("float", [None,28,28,1])
# y = tf.placeholder("float", [None, n_classes])
def cnn_train(x,weights,biases):
    print("in cnn")
    
    '''
    weights = {'W_conv1':tf.Variable(tf.random_normal([3,3,1,32])),#56
               'W_conv2':tf.Variable(tf.random_normal([3,3,32,32])),#56
               'W_conv3':tf.Variable(tf.random_normal([3,3,32,64])),#28
               'W_conv4':tf.Variable(tf.random_normal([3,3,64,64])),#28
               'W_conv5':tf.Variable(tf.random_normal([3,3,64,128])),#14
               'W_conv6':tf.Variable(tf.random_normal([3,3,128,128])),#14
               'W_fc1':tf.Variable(tf.random_normal([7*7*128,1024])),  # since 3 times maxpooling.. inputsize/2^3
               'W_fc2':tf.Variable(tf.random_normal([1024,1024]))
              }
                  # depending on what that repeat vector does

    biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
               'b_conv2':tf.Variable(tf.random_normal([32])),
               'b_conv3':tf.Variable(tf.random_normal([64])),
               'b_conv4':tf.Variable(tf.random_normal([64])),
               'b_conv5':tf.Variable(tf.random_normal([128])),
               'b_conv6':tf.Variable(tf.random_normal([128])),
               'b_fc1':tf.Variable(tf.random_normal([1024])),
               'b_fc2':tf.Variable(tf.random_normal([1024]))
             }
    '''
    
    print("-1")
#     x = tf.convert_to_tensor(x)
    print("00")
    print("bef",x.shape)
    x = tf.reshape(x, shape=[-1, 224, 224, 3])
    print("aft",x.shape)
    print("0")
    conv1 = tf.nn.relu(conv2d(x, weights['W_conv1'])+  biases['bc1'])
    print("********",weights['W_conv1'])
    print("1")
    print("conv1:",conv1.shape)
    conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['bc2'])
    print("2")
    print("conv2:",conv2.shape)
    conv2 = maxpool2d(conv2)
    print("3")
    print("maxpool:",conv2.shape)
#    conv2 = tf.nn.dropout(conv2, 0.2)
#     print("dropout:",conv2.shape)
    print("okay")
    
    conv3 = tf.nn.relu(conv2d(conv2, weights['W_conv3']) + biases['bc3'])
    print("conv3:",conv3.shape)
    conv4 = tf.nn.relu(conv2d(conv3, weights['W_conv4']) + biases['bc4'])
    print("conv3:",conv3.shape)
    #conv4 = conv3
    conv4 = maxpool2d(conv4)
    print("maxpool:",conv4.shape)
 #   conv4 = tf.nn.dropout(conv4, 0.2)
    
    conv5 = tf.nn.relu(conv2d(conv4, weights['W_conv5']) + biases['bc5'])
    print("conv5:",conv5.shape)
    conv6 = tf.nn.relu(conv2d(conv5, weights['W_conv6']) + biases['bc6'])
    print("conv6:",conv6.shape)
    #conv6 = conv5
    conv6 = maxpool2d(conv6)
    print("conv6:",conv6.shape)
 #   conv6 = tf.nn.dropout(conv6, 0.2)

    fc1 = tf.reshape(conv6,[-1, weights['W_fc1'].get_shape().as_list()[0]])
    fc1 = tf.nn.relu(tf.matmul(fc1, weights['W_fc1'])+biases['b_fc1'])
    print("fc1:",fc1.shape)
    fc1 = tf.nn.dropout(fc1, 0.3)
    
    fc2 = tf.nn.relu(tf.matmul(fc1, weights['W_fc2'])+biases['b_fc2'])
    fc2 = tf.nn.dropout(fc2, 0.3)  
    #fc2 = fc1
    
#     out = tf.add(tf.matmul(fc2, weights['out']), biases['out'])
    print("fc2:",fc2.shape)
    print(fc2)
    
#     x_norm = batch_norm_wrapper(fc2,is_training)
    
    
#     inputs=fc2
#     scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
#     beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
#     pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
#     pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)
#     decay=0.9999

#     if is_training:
#         batch_mean, batch_var = tf.nn.moments(inputs,[0])
#         train_mean = tf.assign(pop_mean,
#                                pop_mean * decay + batch_mean * (1 - decay))
#         train_var = tf.assign(pop_var,
#                               pop_var * decay + batch_var * (1 - decay))
#         with tf.control_dependencies([train_mean, train_var]):
#             return tf.nn.batch_normalization(inputs,batch_mean, batch_var, beta, scale, epsilon)
#     else:
#         return tf.nn.batch_normalization(inputs,pop_mean, pop_var, beta, scale, epsilon)
    
    
#     x_norm = tf.layers.batch_normalization(fc2, training=True)
#     input_gru = tf.repeat(fc2,)
    
#     print(x_norm.shape)
    
    return fc2

# def Gru(hidden_size):  
#     gru = GRU(1024,hidden_size)

#     W_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(hidden_size, 1), mean=0, stddev=0.01))
#     b_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(1,), mean=0, stddev=0.01))
#     output = tf.map_fn(lambda h_t: tf.matmul(h_t, W_output) + b_output, gru.h_t)

#     return output

In [9]:
# import torch.utils.data as data
import cv2
import sys
from os import listdir
from os.path import join
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical

def resize_img(png_file_path):
        img_rgb = cv2.imread(png_file_path)
        #img_grey = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
        #img_adapted = cv2.adaptiveThreshold(img_grey, 255, cv2.ADAPTIVE_THRESH_MEAN_C,cv2.THRESH_BINARY, 101, 9)
        #img_stacked = np.repeat(img_adapted[...,None],3,axis=2)
        resized = cv2.resize(img_rgb, (224,224), interpolation=cv2.INTER_AREA)
        bg_img = 255 * np.ones(shape=(224,224,3))
#         print(bg_img.shape,resized.shape)
        bg_img[0:224, 0:224,:] = resized
        bg_img /= 255
        bg_img = np.rollaxis(bg_img, 2, 0)  
#         print(bg_img.shape)
        return bg_img
    
def load_doc(filename):
    file = open(filename, 'r',encoding='UTF-8')
    text = file.read()
    file.close()
    return text

class Dataset():
    def __init__(self, data_dir, input_transform=None, target_transform=None):
        self.data_dir = data_dir
        self.image_filenames = []
        self.texts = []
        all_filenames = listdir(data_dir)
        all_filenames.sort()
        for filename in (all_filenames):
            if filename[-3:] == "png":
                self.image_filenames.append(filename)
            else:
                text = '<START> ' + load_doc(self.data_dir+filename) + ' <END>'
                text = ' '.join(text.split())
                text = text.replace(',', ' ,')
                self.texts.append(text)
        self.input_transform = input_transform
        self.target_transform = target_transform
        
        # Initialize the function to create the vocabulary 
        tokenizer = Tokenizer(filters='', split=" ", lower=False)
        # Create the vocabulary 
        tokenizer.fit_on_texts([load_doc('vocabulary.vocab')])
        self.tokenizer = tokenizer
        # Add one spot for the empty word in the vocabulary 
        self.vocab_size = len(tokenizer.word_index) + 1
        # Map the input sentences into the vocabulary indexes
        self.train_sequences = tokenizer.texts_to_sequences(self.texts)
        # The longest set of boostrap tokens
        self.max_sequence = max(len(s) for s in self.train_sequences)
        # Specify how many tokens to have in each input sentence
        self.max_length = 48
        
        X, y, image_data_filenames = list(), list(), list()
        for img_no, seq in enumerate(self.train_sequences):
            print(img_no)
            in_seq, out_seq = seq[:-1], seq[1:]
            out_seq = to_categorical(out_seq, num_classes=self.vocab_size)
            image_data_filenames.append(self.image_filenames[img_no])
            X.append(in_seq)
            y.append(out_seq)
            print("->",out_seq)
                
        self.X = X
        self.y = y
        self.image_data_filenames = image_data_filenames
        self.images = list()
        for image_name in self.image_data_filenames:
            image = resize_img(self.data_dir+image_name)
            self.images.append(image)

In [10]:
dir_name = 'all_data5/'
batch_size = 32
my_dateset = Dataset(dir_name)

0
-> [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]
1
-> [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]
2
-> [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]]
3
-> [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]

In [11]:
x_train = np.array(my_dateset.images,dtype=np.float32)
for i in range(len(x_train)):
    x_train[i]=np.array(x_train[i],dtype=np.float32)
print(x_train.shape)

(50, 3, 224, 224)


In [12]:
im = tf.placeholder(dtype=tf.float32, shape=(None,3,224,224), name='im')
# is_training = tf.placeholder(dtype=tf.bool, name="is_training")
model_train = cnn_train(im,weights,biases)
model_test = cnn_test(im,weights,biases)
output_train = batch_norm_wrapper(model_train,True)
output_test = batch_norm_wrapper(model_test,False)

in cnn
-1
00
bef (?, 3, 224, 224)
aft (?, 224, 224, 3)
0
******** <tf.Variable 'W0:0' shape=(3, 3, 3, 32) dtype=float32_ref>
1
conv1: (?, 224, 224, 32)
2
conv2: (?, 224, 224, 32)
3
maxpool: (?, 112, 112, 32)
okay
conv3: (?, 112, 112, 64)
conv3: (?, 112, 112, 64)
maxpool: (?, 56, 56, 64)
conv5: (?, 56, 56, 128)
conv6: (?, 56, 56, 128)
conv6: (?, 28, 28, 128)
fc1: (?, 1024)
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
fc2: (?, 1024)
Tensor("dropout_1/mul_1:0", shape=(?, 1024), dtype=float32)
in cnn
-1
00
bef (?, 3, 224, 224)
aft (?, 224, 224, 3)
0
******** <tf.Variable 'W0:0' shape=(3, 3, 3, 32) dtype=float32_ref>
1
conv1: (?, 224, 224, 32)
2
conv2: (?, 224, 224, 32)
3
maxpool: (?, 112, 112, 32)
okay
conv3: (?, 112, 112, 64)
conv3: (?, 112, 112, 64)
maxpool: (?, 56, 56, 64)
conv5: (?, 56, 56, 128)
conv6: (?, 56, 56, 128)
conv6: (?, 28, 28, 128)
fc1: (?, 1024)
fc2: (?, 1024)
Tensor("Relu_15:0", shape=(?, 1024), dtype=fl

In [13]:
expected = my_dateset.y
expected=np.array(expected)
for e in range(len(expected)):
    expected[e]=np.array(expected[e])
print(expected.shape)

(50,)


In [14]:
VOCAB_LEN=19
EMBED_SIZE=50
embeddings = tf.Variable(tf.random_uniform([VOCAB_LEN, EMBED_SIZE]))
caption_p = tf.placeholder(dtype=tf.int32, shape=(None,None), name='caption_p')
embed = tf.nn.embedding_lookup(embeddings, caption_p)

gru_before = GRU(50,256,embed)
gru_before_1 = GRU(256,256,gru_before.h_t)
gru_before_2 = GRU(256,256,gru_before_1.h_t)
# gru_after =  GRU(50,256,)
# hidden_size=256



# W_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(hidden_size, 18), mean=0, stddev=0.01),trainable=True)
# b_output = tf.Variable(tf.truncated_normal(dtype=tf.float64, shape=(18,), mean=0, stddev=0.01),trainable=True)

Wout_gru1 = weights['Wout_gru1']
bout_gru1 = biases['Bout_gru1']

# output = tf.map_fn(lambda h_t: tf.matmul(h_t, W_output) + b_output, gru.h_t)
#output_gru1 = tf.nn.softmax(tf.matmul(gru_before_1.h_t,Wout_gru1)+bout_gru1)
output_gru1 = gru_before_2.h_t
#gru_before_1 = GRU(256,256,gru_before.h_t)
# out2 = tf.matmul(gru.h_t[0], W_output)+b_output

# tf.get_variable('W7', shape=(1024,50), initializer=tf.contrib.layers.xavier_initializer())

# out3 = gru.h_t
# out4 = gru_final.h_t
# print(out3.shape)
# print(out4.shape)

features_try = K.tile(K.expand_dims(output_train, 1), [1, K.shape(output_gru1)[1], 1])
embeddings = tf.concat([features_try,output_gru1],2)


gru_final = GRU(1280,512,embeddings)
gru_final1 = GRU(512,512,gru_final.h_t)
Wout_gru2 = weights['Wout_gru2']
bout_gru2 = biases['Bout_gru2']

output_gru2 = tf.nn.softmax(tf.matmul(gru_final1.h_t,Wout_gru2)+bout_gru2)

true_output = tf.placeholder(dtype=tf.float32, shape=(None,None,None), name='expected_output')
loss = tf.reduce_sum(tf.squared_difference(output_gru2 ,true_output)) #/ float(1)
train_step = tf.train.AdamOptimizer(0.0001).minimize(loss)

h_t: (?, 256)
h_t: (?, 256)
h_t: (?, 256)
h_t: (?, 512)
h_t: (?, 512)


In [15]:
import functools 
def pad(batch_y):
    print(batch_y.shape)
    x=0
    for y in batch_y:
        if(len(y)>x):
            x=len(y)
#     x = functools.reduce(lambda x,y: len(x) if(len(x)>len(y)) else len(y),batch_y)
    
    ret = []
    for y in range(len(batch_y)):
        res=np.zeros(x)
        s = batch_y[y]
        res[0:len(s)]=batch_y[y]
#         batch_y[y]=res
        ret.append(res)
    return np.array(ret)
        
        
# a=[[1,2],[1,2,3]]
# pad(a)

def pad2(batch_ex):
#     r = functools.reduce(lambda x,y: len(x) if(len(x)>len(y)) else len(y),batch_ex)
#     print(":::::",r)
    r=0
    c=0
    for ex in batch_ex:
        shape = ex.shape
#         print(shape)
        if(shape[0]>r):
            r=shape[0]
        if(shape[1]>c):
            c=shape[1]
#     c = functools.reduce(lambda x,y: len(x[0]) if(len(x[0])>len(y[0])) else len(y[0]),batch_ex)
#     print(":::::",c)
#     print(r,c)
    ret=[]
    for ex in batch_ex:
        res=np.zeros((r,c))
#         print(res.shape)
#         print(ex.shape)
        res[0:ex.shape[0],0:ex.shape[1]]=ex
        ret.append(res)
#     print(ret)
        
    return(np.array(ret))

In [33]:
epoch = 10
vocab_size = 19
batch_size=5

x_train = my_dateset.images
caption = my_dateset.X
expected = my_dateset.y

saver = tf.train.Saver()

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    loss_ar=[]
    for e in range(epoch):
        loss_no=[]
        print(e)
        for batch in range(len(x_train)//batch_size):
            print("batch ",batch)
            batch_x = x_train[batch*batch_size:min((batch+1)*batch_size,len(x_train))]
            batch_y = caption[batch*batch_size:min((batch+1)*batch_size,len(caption))] 
            batch_ex = expected[batch*batch_size:min((batch+1)*batch_size,len(expected))]
            print("x:",len(batch_x))
            print("y:",len(batch_y))
            print("ex:",len(batch_ex))
            
#             print(batch_y)
            
            batch_x = np.array(batch_x)
            for b in range(len(batch_x)):
                batch_x[b]=np.array(batch_x[b])
            batch_y = np.array(batch_y)
            for b in range(len(batch_y)):
                batch_y[b]=np.array(batch_y[b])
            batch_ex = np.array(batch_ex)
            for b in range(len(batch_ex)):
                batch_ex[b]=np.array(batch_ex[b])
                
            print("bex:",batch_ex.shape)
#             print(batch_ex[0].shape)
                
            batch_y = pad(batch_y)
            batch_ex = pad2(batch_ex)
            
#             batch_y = batch_y.reshape((-1,1))
#             print(batch_ex.shape)
            

            # print("Sssss:",ex.shape)
            ls,tr = sess.run([loss,train_step],feed_dict ={true_output:batch_ex,im:batch_x,caption_p:batch_y})
            print(ls/batch_size)
            loss_no.append(ls/batch_size)
            print("\n\n")
#         el = sess.run(epoch_loss,feed_dict={e_loss:loss_no})
        loss_ar.append(loss_no)

        print("-----------------------------------------------------------------") 
    save_path = saver.save(sess, "model10.ckpt")

0
batch  0
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
65.1732421875



batch  1
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
70.90379638671875



batch  2
x: 5
y: 5
ex: 5
bex: (5, 76, 18)
(5, 76)
72.77974853515624



batch  3
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
69.05892944335938



batch  4
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
68.02548828125



batch  5
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
59.476513671875



batch  6
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
60.08052978515625



batch  7
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
59.82356567382813



batch  8
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
66.48351440429687



batch  9
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
58.8421875



-----------------------------------------------------------------
1
batch  0
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
64.83395385742188



batch  1
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
70.50953979492188



batch  2
x: 5
y: 5
ex: 5
bex: (5, 76, 18)
(5, 76)
72.40059204101563



batch  3
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
68.65201416015626



batch  4
x: 5
y: 5
ex: 5
bex: (5,)
(5,)
67.68262939453125



batch  5
x:

In [17]:
for l in loss_ar:
    print(l)

[65.10164794921874, 70.7837890625, 72.65057983398438, 69.0483154296875, 68.02144775390624, 59.3773193359375, 60.08030395507812, 59.93126220703125, 66.38382568359376, 58.739404296875]
[64.68706665039062, 70.42816162109375, 72.26708984375, 68.67350463867187, 67.59902954101562, 59.01786499023437, 59.640618896484376, 59.4095947265625, 65.73562622070312, 58.142486572265625]
[63.845166015625, 69.33342895507812, 70.88839721679688, 67.02454833984375, 65.625341796875, 56.98030395507813, 57.1434814453125, 56.63511962890625, 62.52720336914062, 55.86893310546875]
[62.57744140625, 65.89756469726562, 67.52384033203126, 64.46629638671875, 63.6174560546875, 55.89561157226562, 56.618951416015626, 56.52298583984375, 62.33046264648438, 55.424078369140624]
[61.548492431640625, 65.78167114257812, 67.4919677734375, 64.28148803710937, 63.52486572265625, 55.77720336914062, 56.64241333007813, 55.99527587890625, 62.04964599609375, 54.925048828125]
[61.91465454101562, 65.87943115234376, 67.52506103515626, 64.395

In [22]:
def word_for_id(integer, tokenizer):
    print(tokenizer.word_index.items())
    for word, index in tokenizer.word_index.items():
        if index == integer:
            return word
    return None
def load_val_images(data_dir):
    image_filenames =[]
    images = []
    all_filenames = listdir(data_dir)
    all_filenames.sort()
    for filename in (all_filenames):
        if filename[-3:] == "png":
            image_filenames.append(filename)
    for name in image_filenames:
        image = resize_img(data_dir+name)
        images.append(image)
    return images

In [31]:
decoded_words = []
star_text = '<START> '
image = load_val_images('all_data5/')[35]
img_tensor=np.expand_dims(np.array(image),0)
img_tensor=np.array(img_tensor)
predicted = '<START> '
 
img_tensor.shape

(1, 3, 224, 224)

In [32]:
features_try = K.tile(K.expand_dims(output_test, 1), [1, K.shape(output_gru1)[1], 1])
embeddings = tf.concat([features_try,output_gru1],2)

predicted='<START>'
star_text = '<START>'
with tf.Session() as sess:
#     init = tf.global_variables_initializer()
#     sess.run(init)
    saver.restore(sess, "model10.ckpt")
#     print("####",weights['W_conv1'].eval())
    for di in range(50):
        #print(star_text)
        sequence = my_dateset.tokenizer.texts_to_sequences([star_text])
#         decoder_input = to_categorical(sequence, num_classes=18)
#         print(decoder_input)
#         print(sequence)
        decoder_input = np.array(sequence).reshape(-1,1)
       # print(decoder_input)
        temp =[]
        for x in sequence:
            temp.append(x)
        
        temp = np.array(temp)
        print(temp.shape)
    
        a = sess.run(output_gru2, feed_dict={im:img_tensor,caption_p:temp})
        #print(a)
        
        data=list(a[0][-1])
        print(data)
        i=data.index(max(data))
        print(i)
        word = word_for_id(i,my_dateset.tokenizer)
        #print(word)
        if word is None:
#             print(x)
            continue
        predicted += word + ' '
        star_text += ' ' +word
        print(predicted)
        if word == '<END>':
            pass
#             break

INFO:tensorflow:Restoring parameters from model10.ckpt
(1, 1)
[0.034906674, 0.049482975, 0.0544656, 0.060248356, 0.053772617, 0.043561935, 0.059138093, 0.08920273, 0.052030053, 0.049945958, 0.09046771, 0.060585875, 0.04154859, 0.08634229, 0.04624667, 0.038249373, 0.04322531, 0.04657915]
10
dict_items([(',', 1), ('{', 2), ('}', 3), ('small-title', 4), ('text', 5), ('quadruple', 6), ('row', 7), ('btn-inactive', 8), ('btn-orange', 9), ('btn-green', 10), ('btn-red', 11), ('double', 12), ('<START>', 13), ('header', 14), ('btn-active', 15), ('<END>', 16), ('single', 17)])
<START>btn-green 
(1, 2)
[0.03263107, 0.059608594, 0.062464274, 0.06666814, 0.05749256, 0.04773391, 0.0603321, 0.08682609, 0.053770997, 0.049617108, 0.08508026, 0.059800625, 0.042393193, 0.0702998, 0.04185858, 0.037389137, 0.042016998, 0.04401664]
7
dict_items([(',', 1), ('{', 2), ('}', 3), ('small-title', 4), ('text', 5), ('quadruple', 6), ('row', 7), ('btn-inactive', 8), ('btn-orange', 9), ('btn-green', 10), ('btn-red', 1

[0.0073694857, 0.19466347, 0.14214297, 0.14755307, 0.09749106, 0.098453134, 0.056973018, 0.039935946, 0.028450934, 0.032515626, 0.03294427, 0.037492804, 0.031105649, 0.0032136843, 0.006264204, 0.013573069, 0.015155595, 0.014701847]
1
dict_items([(',', 1), ('{', 2), ('}', 3), ('small-title', 4), ('text', 5), ('quadruple', 6), ('row', 7), ('btn-inactive', 8), ('btn-orange', 9), ('btn-green', 10), ('btn-red', 11), ('double', 12), ('<START>', 13), ('header', 14), ('btn-active', 15), ('<END>', 16), ('single', 17)])
<START>btn-green row row , , , , , , , , , , , , , , 
(1, 18)
[0.007325175, 0.19345048, 0.14094214, 0.14904535, 0.09814691, 0.09908399, 0.0572461, 0.039755564, 0.028176686, 0.032524724, 0.033000514, 0.037581477, 0.03107127, 0.0031726554, 0.0062221973, 0.013484178, 0.01508355, 0.014687153]
1
dict_items([(',', 1), ('{', 2), ('}', 3), ('small-title', 4), ('text', 5), ('quadruple', 6), ('row', 7), ('btn-inactive', 8), ('btn-orange', 9), ('btn-green', 10), ('btn-red', 11), ('double', 

[0.007167753, 0.18828166, 0.13495855, 0.15557781, 0.10080379, 0.10147119, 0.058597255, 0.03913738, 0.027133947, 0.032535873, 0.03336294, 0.038060356, 0.031068059, 0.003029792, 0.0060927775, 0.013165963, 0.014863697, 0.014691192]
1
dict_items([(',', 1), ('{', 2), ('}', 3), ('small-title', 4), ('text', 5), ('quadruple', 6), ('row', 7), ('btn-inactive', 8), ('btn-orange', 9), ('btn-green', 10), ('btn-red', 11), ('double', 12), ('<START>', 13), ('header', 14), ('btn-active', 15), ('<END>', 16), ('single', 17)])
<START>btn-green row row , , , , , , , , , , , , , , , , , , , , , , , , , , , , , 
(1, 33)
[0.007165548, 0.1882238, 0.13486497, 0.15566336, 0.10083514, 0.10149867, 0.058619585, 0.039131224, 0.027121264, 0.03253462, 0.033368506, 0.03806766, 0.031070841, 0.0030280317, 0.006091313, 0.013162256, 0.014861844, 0.014691545]
1
dict_items([(',', 1), ('{', 2), ('}', 3), ('small-title', 4), ('text', 5), ('quadruple', 6), ('row', 7), ('btn-inactive', 8), ('btn-orange', 9), ('btn-green', 10), (

[0.0071553034, 0.18794674, 0.13441278, 0.15606353, 0.10097643, 0.10161566, 0.05872568, 0.03910636, 0.027066436, 0.032526586, 0.033401057, 0.038105983, 0.031090831, 0.003020419, 0.006086089, 0.013147659, 0.014856982, 0.0146952495]
1
dict_items([(',', 1), ('{', 2), ('}', 3), ('small-title', 4), ('text', 5), ('quadruple', 6), ('row', 7), ('btn-inactive', 8), ('btn-orange', 9), ('btn-green', 10), ('btn-red', 11), ('double', 12), ('<START>', 13), ('header', 14), ('btn-active', 15), ('<END>', 16), ('single', 17)])
<START>btn-green row row , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , , 
(1, 47)
[0.007155185, 0.18794148, 0.13440377, 0.1560696, 0.10097846, 0.10161598, 0.058727834, 0.039106455, 0.027065312, 0.032526713, 0.03340217, 0.03810752, 0.031092182, 0.0030204512, 0.006086226, 0.013147472, 0.014857405, 0.014695715]
1
dict_items([(',', 1), ('{', 2), ('}', 3), ('small-title', 4), ('text', 5), ('quadruple', 6), ('row', 7), ('btn-inactive', 8), ('btn-oran