In [1]:
import data
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn

In [2]:
class BatchSampler(object):
    '''
    A (very) simple wrapper to randomly sample batches without replacement.
    '''
    def __init__(self, data, targets, batch_size):
        self.num_points = data.shape[0]
        self.features = data.shape[1]
        self.batch_size = batch_size

        self.data = data
        self.targets = targets

        self.indices = np.arange(self.num_points)

    def random_batch_indices(self, m=None):
        '''
        Get random batch indices without replacement from the dataset.
        If m is given the batch will be of size m. Otherwise will default to the class initialized value.
        '''
        if m is None:
            indices = np.random.choice(self.indices, self.batch_size, replace=False)
        else:
            indices = np.random.choice(self.indices, m, replace=False)
        return indices 

    def get_batch(self, m=None):
        '''
        Get a random batch without replacement from the dataset.
        If m is given the batch will be of size m. Otherwise will default to the class initialized value.
        '''
        indices = self.random_batch_indices(m)
        X_batch = np.take(self.data, indices, 0)
        y_batch = self.targets[indices]
        return X_batch, y_batch

In [3]:
lr = 0.001                 # learning rate
batch_size = 70            
n_inputs = 8               # 8 by 8 pixels
n_steps = 8                # time steps
n_hidden_units = 64       # neurons in hidden layer
n_classes = 10             # classes (0-9 digits)

train_data, train_labels, test_data, test_labels = data.load_all_data('data')
train_labels = tf.keras.utils.to_categorical(train_labels, n_classes)
test_labels = tf.keras.utils.to_categorical(test_labels, n_classes)
# x y placeholder
x = tf.placeholder("float", [None, n_steps, n_inputs])
y = tf.placeholder("float", [None, n_classes])

out_weights=tf.Variable(tf.random_normal([n_hidden_units,n_classes]))
out_bias=tf.Variable(tf.random_normal([n_classes]))

In [4]:
test_data.shape

(4000, 64)

In [5]:
#processing the input tensor from [batch_size,n_steps,n_input] to "time_steps" number of [batch_size,n_input] tensors
input=tf.unstack(x ,n_steps, 1)

#defining the network, forget_bias=1 reduce the scale of forgetting
lstm_layer=rnn.BasicLSTMCell(n_hidden_units,forget_bias=1)
outputs,_=rnn.static_rnn(lstm_layer,input,dtype="float32")

#converting last output of dimension [batch_size,num_units] to [batch_size,n_classes] by out_weight multiplication
prediction=tf.matmul(outputs[-1],out_weights)+out_bias

#loss_function
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,labels=y))
#optimization
opt=tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

#model evaluation
correct_prediction=tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

In [7]:
#initialize variables
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    iter=1
    while iter<6001:
        batch_sampler = BatchSampler(train_data, train_labels, batch_size)
        batch_x,batch_y = batch_sampler.get_batch()
        
        batch_x=batch_x.reshape((batch_size,n_steps,n_inputs))

        sess.run(opt, feed_dict={x: batch_x, y: batch_y})

        if iter%500==0:
            acc=sess.run(accuracy,feed_dict={x:batch_x,y:batch_y})
            los=sess.run(loss,feed_dict={x:batch_x,y:batch_y})
            print("For iter {}, Train Accuracy: {}, Loss: {}".format(iter, acc, los))
            #calculating test accuracy
            test_data = test_data.reshape((-1, n_steps, n_inputs))
            print("Test Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_labels}))

        iter+=1

For iter 500, Train Accuracy: 0.9428571462631226, Loss: 0.19981510937213898
Test Accuracy: 0.898
For iter 1000, Train Accuracy: 1.0, Loss: 0.10989219695329666
Test Accuracy: 0.93275
For iter 1500, Train Accuracy: 0.9857142567634583, Loss: 0.05878107249736786
Test Accuracy: 0.94875
For iter 2000, Train Accuracy: 0.9714285731315613, Loss: 0.06761782616376877
Test Accuracy: 0.95425
For iter 2500, Train Accuracy: 0.9714285731315613, Loss: 0.05267376825213432
Test Accuracy: 0.96025
For iter 3000, Train Accuracy: 0.9714285731315613, Loss: 0.1472402662038803
Test Accuracy: 0.964
For iter 3500, Train Accuracy: 0.9857142567634583, Loss: 0.05073528736829758
Test Accuracy: 0.966
For iter 4000, Train Accuracy: 1.0, Loss: 0.006725245621055365
Test Accuracy: 0.96925
For iter 4500, Train Accuracy: 1.0, Loss: 0.03400745615363121
Test Accuracy: 0.973
For iter 5000, Train Accuracy: 1.0, Loss: 0.011263670399785042
Test Accuracy: 0.97275
For iter 5500, Train Accuracy: 1.0, Loss: 0.0041229939088225365
Test

In [5]:
#processing the input tensor from [batch_size,n_steps,n_input] to "time_steps" number of [batch_size,n_input] tensors
input=tf.unstack(x ,n_steps, 1)

#defining the network
lstm_layer=rnn.BasicLSTMCell(n_hidden_units,forget_bias=0)
outputs,_=rnn.static_rnn(lstm_layer,input,dtype="float32")

#converting last output of dimension [batch_size,num_units] to [batch_size,n_classes] by out_weight multiplication
prediction=tf.matmul(outputs[-1],out_weights)+out_bias

#loss_function
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction,labels=y))
#optimization
opt=tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

#model evaluation
correct_prediction=tf.equal(tf.argmax(prediction,1),tf.argmax(y,1))
accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

In [6]:
#initialize variables
init=tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    iter=1
    while iter<6001:
        batch_sampler = BatchSampler(train_data, train_labels, batch_size)
        batch_x,batch_y = batch_sampler.get_batch()
        
        batch_x=batch_x.reshape((batch_size,n_steps,n_inputs))

        sess.run(opt, feed_dict={x: batch_x, y: batch_y})

        if iter%500==0:
            acc=sess.run(accuracy,feed_dict={x:batch_x,y:batch_y})
            los=sess.run(loss,feed_dict={x:batch_x,y:batch_y})
            print("For iter {}, Train Accuracy: {}, Loss: {}".format(iter, acc, los))
            #calculating test accuracy
            test_data = test_data.reshape((-1, n_steps, n_inputs))
            print("Test Accuracy:", sess.run(accuracy, feed_dict={x: test_data, y: test_labels}))

        iter+=1

For iter 500, Train Accuracy: 0.9285714030265808, Loss: 0.2269534170627594
Test Accuracy: 0.892
For iter 1000, Train Accuracy: 0.9714285731315613, Loss: 0.2718629539012909
Test Accuracy: 0.93225
For iter 1500, Train Accuracy: 0.9714285731315613, Loss: 0.07966935634613037
Test Accuracy: 0.94375
For iter 2000, Train Accuracy: 0.9714285731315613, Loss: 0.07058509439229965
Test Accuracy: 0.9515
For iter 2500, Train Accuracy: 0.9857142567634583, Loss: 0.04764769598841667
Test Accuracy: 0.96075
For iter 3000, Train Accuracy: 1.0, Loss: 0.012779136188328266
Test Accuracy: 0.96775
For iter 3500, Train Accuracy: 1.0, Loss: 0.028594227507710457
Test Accuracy: 0.971
For iter 4000, Train Accuracy: 1.0, Loss: 0.017648153007030487
Test Accuracy: 0.973
For iter 4500, Train Accuracy: 1.0, Loss: 0.021979818120598793
Test Accuracy: 0.9745
For iter 5000, Train Accuracy: 0.9428571462631226, Loss: 0.11685161292552948
Test Accuracy: 0.97725
For iter 5500, Train Accuracy: 0.9857142567634583, Loss: 0.05554700