In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split

In [2]:
def init_parameter(layer):
    parameters = {}
    Adam_caches = {}
    Adam_caches['t'] = tf.Variable(1.0, name='t')
    with tf.name_scope('Initial_Parameters'):
        for i in range(1, len(layer)):
            parameters['W'+str(i)] = tf.Variable(tf.random_normal([layer[i-1], layer[i]], seed=1) * tf.sqrt(2/layer[i-1]), name='W'+str(i))
            parameters['b'+str(i)] = tf.Variable(tf.zeros([1, layer[i]]), name='b'+str(i))
            Adam_caches['Vdw'+str(i)] = tf.Variable(tf.zeros([layer[i-1], layer[i]]), name='Vdw'+str(i))
            Adam_caches['Sdw'+str(i)] = tf.Variable(tf.zeros([layer[i-1], layer[i]]), name='Sdw'+str(i))
            Adam_caches['Vdb'+str(i)] = tf.Variable(tf.zeros([1, layer[i]]), name='Vdb'+str(i))
            Adam_caches['Sdb'+str(i)] = tf.Variable(tf.zeros([1, layer[i]]), name='Sdb'+str(i))
    return parameters, Adam_caches

def forward_propagate(train_X, parameters, en_dropout=0, keep_prob=0.5):
    network_deep = int(len(parameters)/2)
    caches = {}
    dropout_layers = {}
    caches['A0'] = train_X
    for i in range(1, network_deep):
        with tf.name_scope('hidden_layer'+str(i)):
            caches['Z'+str(i)] = tf.add(tf.matmul(caches['A'+str(i-1)], parameters['W'+str(i)]), parameters['b'+str(i)], name='Z'+str(i))
            caches['A'+str(i)] = tf.nn.relu(caches['Z'+str(i)], name='A'+str(i))     
            if en_dropout:
                caches['A'+str(i)] = tf.nn.dropout(caches['A'+str(i)], keep_prob=keep_prob, name='dropout'+str(i))
            # Visualize
            tf.summary.histogram('A'+str(i), caches['A'+str(i)])
        
    with tf.name_scope('output_layer'):
        caches['Z'+str(network_deep)] = tf.add(tf.matmul(caches['A'+str(network_deep-1)], parameters['W'+str(network_deep)]), parameters['b'+str(network_deep)], name='Z'+str(network_deep))
        caches['Y_hat'] = tf.nn.softmax(caches['Z'+str(network_deep)], name='Y_hat')
        # Visualize
        tf.summary.histogram('Y_hat'+str(i), caches['Y_hat'])
    return caches
    
def compute_cost(train_Y, Y_hat, parameters=0, reg_rate=0):
    with tf.name_scope('compute_cost'):
        with tf.name_scope('cross-entropy_cost'):
            cost = tf.reduce_mean(-train_Y*tf.log(Y_hat+1e-10) - (1-train_Y)*tf.log(1-Y_hat+1e-10))
        if reg_rate != 0:
            network_deep = int(len(parameters)/2)
            with tf.name_scope('L2_regularization_cost'):
                sum_W = 0
                for i in range(1, network_deep+1):
                    sum_W += tf.nn.l2_loss([parameters['W'+str(i)]])
                    sum_W += tf.nn.l2_loss([parameters['b'+str(i)]])
                m = tf.cast(tf.shape(parameters['W1'])[0], dtype=tf.float32, name='m')
                L2 = sum_W * reg_rate / m
            cost = cost + L2
        # Visualize
        tf.summary.scalar('cost', cost)
    return cost

def compute_accuracy(train_Y, Y_hat):
    with tf.name_scope('compute_accuracy'):
        correct = tf.equal(tf.argmax(Y_hat, 1), tf.argmax(train_Y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        # Visualize
        tf.summary.scalar('accuracy', accuracy)
    return accuracy

def compute_gradients(cost, parameters):
    network_deep = int(len(parameters)/2)
    grads = {}
    for i in range(1, network_deep+1):
        grads['grad_W'+str(i)] = tf.gradients(cost, parameters['W'+str(i)], name='grad_W'+str(i))[0]
        grads['grad_b'+str(i)] = tf.gradients(cost, parameters['b'+str(i)], name='grad_b'+str(i))[0]
        # Visualize
        tf.summary.histogram('gradients_W'+str(i), grads['grad_W'+str(i)])
        tf.summary.histogram('gradients_b'+str(i), grads['grad_b'+str(i)])
    return grads

def SGC_optimization(grads, parameters, learning_rate=0.01):
    network_deep = int(len(parameters)/2)
    new_parameters = {}
    for i in range(1, network_deep+1):
        with tf.name_scope('Compute_new_W'+str(i)):
            new_parameters['new_W'+str(i)] = parameters['W'+str(i)] - learning_rate * grads['grad_W'+str(i)]
        with tf.name_scope('Compute_new_b'+str(i)):
            new_parameters['new_b'+str(i)] = parameters['b'+str(i)] - learning_rate * grads['grad_b'+str(i)]
            
        parameters['W'+str(i)] = tf.assign(parameters['W'+str(i)], new_parameters['new_W'+str(i)], name='update_W'+str(i))
        parameters['b'+str(i)] = tf.assign(parameters['b'+str(i)], new_parameters['new_b'+str(i)], name='update_b'+str(i))
        # Visualize
        tf.summary.histogram('W'+str(i), parameters['W'+str(i)])
        tf.summary.histogram('b'+str(i), parameters['b'+str(i)])
    return parameters

def Adam_optimization(grads, parameters, Adam_caches, learning_rate=0.01, beta1=0.9, beta2=0.999):
    network_deep = int(len(parameters)/2)
    new_parameters = {}
    for i in range(1, network_deep+1):
        with tf.name_scope('Compute_new_W'+str(i)):
            Adam_caches['Vdw'+str(i)] = tf.assign(Adam_caches['Vdw'+str(i)], beta1*Adam_caches['Vdw'+str(i)] + (1.0-beta1)*grads['grad_W'+str(i)])
            Adam_caches['Sdw'+str(i)] = tf.assign(Adam_caches['Sdw'+str(i)], beta2*Adam_caches['Sdw'+str(i)] + (1.0-beta2)*grads['grad_W'+str(i)]*grads['grad_W'+str(i)])
            Adam_caches['Vdwcorrected'+str(i)] = Adam_caches['Vdw'+str(i)]/(1.0-beta1**Adam_caches['t'])
            Adam_caches['Sdwcorrected'+str(i)] = Adam_caches['Sdw'+str(i)]/(1.0-beta2**Adam_caches['t'])
            new_parameters['new_W'+str(i)] = parameters['W'+str(i)] - learning_rate * Adam_caches['Vdwcorrected'+str(i)]/tf.sqrt(Adam_caches['Sdwcorrected'+str(i)]+1e-8)
        with tf.name_scope('Compute_new_b'+str(i)):
            Adam_caches['Vdb'+str(i)] = tf.assign(Adam_caches['Vdb'+str(i)], beta1*Adam_caches['Vdb'+str(i)] + (1.0-beta1)*grads['grad_b'+str(i)])
            Adam_caches['Sdb'+str(i)] = tf.assign(Adam_caches['Sdb'+str(i)], beta2*Adam_caches['Sdb'+str(i)] + (1.0-beta2)*grads['grad_b'+str(i)]*grads['grad_b'+str(i)])
            Adam_caches['Vdbcorrected'+str(i)] = Adam_caches['Vdb'+str(i)]/(1.0-beta1**Adam_caches['t'])
            Adam_caches['Sdbcorrected'+str(i)] = Adam_caches['Sdb'+str(i)]/(1.0-beta2**Adam_caches['t'])
            new_parameters['new_b'+str(i)] = parameters['b'+str(i)] - learning_rate * Adam_caches['Vdbcorrected'+str(i)]/tf.sqrt(Adam_caches['Sdbcorrected'+str(i)]+1e-8)
        
        parameters['W'+str(i)] = tf.assign(parameters['W'+str(i)], new_parameters['new_W'+str(i)], name='update_W'+str(i))
        parameters['b'+str(i)] = tf.assign(parameters['b'+str(i)], new_parameters['new_b'+str(i)], name='update_b'+str(i))
        # Visualize
        tf.summary.histogram('W'+str(i), parameters['W'+str(i)])
        tf.summary.histogram('b'+str(i), parameters['b'+str(i)])
        tf.summary.histogram('Vdw'+str(i), Adam_caches['Vdw'+str(i)])
        tf.summary.histogram('Sdw'+str(i), Adam_caches['Sdw'+str(i)])
        tf.summary.histogram('Vdb'+str(i), Adam_caches['Vdb'+str(i)])
        tf.summary.histogram('Sdb'+str(i), Adam_caches['Sdb'+str(i)])
        
    Adam_caches['t'] = tf.assign(Adam_caches['t'], Adam_caches['t'] + 1.0)
    # Visualize
    tf.summary.scalar('t', Adam_caches['t'])
    return parameters

def training(cost, parameters, Adam_caches=0, learning_rate=0.01):
    with tf.name_scope('training'):
        grads = compute_gradients(cost, parameters)
        trained = Adam_optimization(grads, parameters, Adam_caches=Adam_caches, learning_rate=learning_rate)
        #trained = SGC_optimization(grads, parameters, learning_rate)
    return trained

In [3]:
def train_model(data_set, layer_info, learning_rate_init=0.01, larning_rate_decay_rate=1, keep_prob=1, reg_rate=0, iteration_st=0, iteration=1000, batch_size = 128, loaddir=None, logname=None):
    # Reset Summary Graph
    tf.reset_default_graph()
    
    # Define input
    X = tf.placeholder('float', name='X')
    Y = tf.placeholder('float', name='Y')
    learning_rate = tf.placeholder('float', name='learning_rate')
    
    # Forward propagation
    parameters, Adam_caches = init_parameter(layer_info)
    caches = forward_propagate(X, parameters, en_dropout=keep_prob!=1, keep_prob=keep_prob)
    cost = compute_cost(Y, caches['Y_hat'], parameters, reg_rate=reg_rate)
    accuracy = compute_accuracy(Y, caches['Y_hat'])
        
    # Training
    trained = training(cost, parameters, Adam_caches, learning_rate)
    #trained = tf.train.AdamOptimizer(learning_rate).minimize(cost)
    
    # Visualize
    tf.summary.scalar('learning_rate', learning_rate)
    merged = tf.summary.merge_all()
    
    # Initial Save
    saver = tf.train.Saver(max_to_keep=1)
    
    # Implement mini-batch
    train_X_batch, train_Y_batch = tf.train.batch([data_set['train_X'], data_set['train_Y']], batch_size=batch_size, allow_smaller_final_batch=True, enqueue_many=True)

    # Run tensorflow
    init = tf.global_variables_initializer()
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.allow_soft_placement = True
    config.log_device_placement = True
    with tf.Session(config=config) as sess:
        sess.run(init)
        learning_rate_final = learning_rate_init
        
        #Set date time for Summary
        now = datetime.now()
        if logname is None:
            logdir = './logs/' + str(now)
        else:
            logdir = './logs/' + logname
        writer = tf.summary.FileWriter(logdir, sess.graph)
        
        if loaddir is not None:
            loaddir = './checkpoint/' + loaddir
            saver.restore(sess, loaddir)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord) 
        
        for i in range(iteration_st, iteration+iteration_st+1):
            # Learning_rate_decay
            if (i%10 == 0) and (i!=0) :
                learning_rate_final = learning_rate_final * larning_rate_decay_rate
            
            batch_num = data_set['train_X'].shape[0]/batch_size
            if batch_num % 1 != 0:
                batch_num = int(batch_num) + 1
            else:
                batch_num = int(batch_num)
            
            for j in range(1, batch_num+1):
                # Get mini-batch
                train_X_mini, train_Y_mini = sess.run([train_X_batch, train_Y_batch])
                
                # Run train
                sess.run(trained, feed_dict={X:train_X_mini, Y:train_Y_mini, learning_rate:learning_rate_final})
                
            train_acc = sess.run(accuracy, feed_dict={X:train_X_mini, Y:train_Y_mini})
            train_cost = sess.run(cost, feed_dict={X:train_X_mini, Y:train_Y_mini})
            dev_acc = sess.run(accuracy, feed_dict={X:data_set['dev_X'], Y:data_set['dev_Y']})
            dev_cost = sess.run(cost, feed_dict={X:data_set['dev_X'], Y:data_set['dev_Y']})
            
            summary = sess.run(merged, feed_dict={X:train_X_mini, Y:train_Y_mini, learning_rate:learning_rate_final})
            writer.add_summary(summary, global_step=i)
            print("epoch: {}, train_cost: {}, train_acc: {:.2f}%, dev_cost: {}, dev_acc: {:.2f}%".format(i, train_cost, train_acc*100, dev_cost, dev_acc*100))
                
            # Save checkpoint
            savedir = './checkpoint/' + logname
            save_path = saver.save(sess, savedir)
        
        coord.request_stop()
        coord.join(threads)

In [4]:
def predict(loaddir, inputs, labels, layer_info):
    #Reset Summary Graph
    tf.reset_default_graph()
    
    #Define input
    X = tf.placeholder('float', name='X')
    Y = tf.placeholder('float', name='Y')
    
    #Initial parameters
    parameters, _ = init_parameter(layer_info)
    
    #Forward Propagation
    caches = forward_propagate(X, parameters)
        
    #Compute Accuracy
    accuracy = compute_accuracy(Y, caches['Y_hat'])
    
    #Initial Save
    saver = tf.train.Saver(max_to_keep=0)

    #Run tensorflow
    init = tf.global_variables_initializer()
    
    with tf.Session() as sess:
        sess.run(init)
        
        loaddir = './checkpoint/' + loaddir
        saver.restore(sess, loaddir)
        
        writer = tf.summary.FileWriter('./logs/predict', sess.graph)
        
        caches_sh, accuracy_sh = sess.run([caches, accuracy], feed_dict={X:inputs, Y:labels})
    return caches_sh['Y_hat'], accuracy_sh

In [5]:
DATA_TRAIN_DIR = '../inputs/emnist-digits-train.csv'
DATA_TEST_DIR = '../inputs/emnist-digits-test.csv'

# Read CSV
csv_read = pd.read_csv(DATA_TRAIN_DIR, header=None, sep=',', dtype='uint8')

# Split training set into train and dev sets
_labels = csv_read.iloc[:, 0]
train_set, dev_set = train_test_split(csv_read, test_size=0.2, random_state=42, stratify=_labels)

# Normalization and One-hot encoder
train_X = np.array(train_set.iloc[:, 1:] / 255, dtype=np.float32)
train_Y = np.array(pd.get_dummies(train_set.iloc[:, 0]), dtype=np.float32)
dev_X = np.array(dev_set.iloc[:, 1:] / 255, dtype=np.float32)
dev_Y = np.array(pd.get_dummies(dev_set.iloc[:, 0]), dtype=np.float32)

# Read CSV
csv_read = pd.read_csv(DATA_TEST_DIR, header=None, sep=',', dtype='uint8')

# Normalization and One-hot encoder
test_X = np.array(train_set.iloc[:, 1:] / 255, dtype=np.float32)
test_Y = np.array(pd.get_dummies(train_set.iloc[:, 0]), dtype=np.float32)

# Assemble
data_set = {'train_X': train_X, 'train_Y': train_Y, 'dev_X': dev_X, 'dev_Y': dev_Y, 'test_X': test_X, 'test_Y': test_Y}

In [6]:
layer_info = [data_set['train_X'].shape[1], 100, 100, 100, 100, data_set['train_Y'].shape[1]]
train_model(data_set, layer_info, learning_rate_init=0.001, reg_rate=0.12, iteration_st=0, iteration=20, batch_size=128, logname='EMNISTletters_100_100_100_100_0.12_minibatch128_Adam')

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.batch(batch_size)` (or `padded_batch(...)` if `dynamic_pad=True`).
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
epoch: 0, train_cost: 0.04639327526092529, train_acc: 97.66%, dev_cost: 0.050553567707538605, dev_acc: 97.03%
epoch: 1, train_cost: 0.03702140599489212, train_acc: 98.44%, dev_cost: 0.04233405366539955, dev_acc: 97.66%
epoch: 2, train_cost: 0.03259336203336716, train_acc: 99.22%, dev_cost: 0.03814941644668579, dev_acc: 97.96%
epoch: 3, train_cost: 0.029835062101483345, train_acc: 99.22%, dev_cost: 0.0351

In [7]:
model = 'EMNISTletters_100_100_100_100_0.12_minibatch128_Adam'
_, accuracy = predict(model, data_set['train_X'], data_set['train_Y'], layer_info)
print('The train_set accuracy is : ', accuracy*100, '%')
_, accuracy = predict(model, data_set['dev_X'], data_set['dev_Y'], layer_info)
print('The dev_set accuracy is : ', accuracy*100, '%')
_, accuracy = predict(model, data_set['test_X'], data_set['test_Y'], layer_info)
print('The test_set accuracy is : ', accuracy*100, '%')
print('-----------------------------')

Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./checkpoint/EMNISTletters_100_100_100_100_0.12_minibatch128_Adam
The train_set accuracy is :  99.12968873977661 %
INFO:tensorflow:Restoring parameters from ./checkpoint/EMNISTletters_100_100_100_100_0.12_minibatch128_Adam
The dev_set accuracy is :  98.53125214576721 %
INFO:tensorflow:Restoring parameters from ./checkpoint/EMNISTletters_100_100_100_100_0.12_minibatch128_Adam
The test_set accuracy is :  99.12968873977661 %
-----------------------------


In [8]:
DATA_TRAIN_DIR = '../inputs/emnist-letters-train.csv'
DATA_TEST_DIR = '../inputs/emnist-letters-test.csv'

# Read CSV
csv_read = pd.read_csv(DATA_TRAIN_DIR, header=None, sep=',', dtype='uint8')

# Split training set into train and dev sets
_labels = csv_read.iloc[:, 0]
train_set, dev_set = train_test_split(csv_read, test_size=0.2, random_state=42, stratify=_labels)

# Normalization and One-hot encoder
train_X = np.array(train_set.iloc[:, 1:] / 255, dtype=np.float32)
train_Y = np.array(pd.get_dummies(train_set.iloc[:, 0]), dtype=np.float32)
dev_X = np.array(dev_set.iloc[:, 1:] / 255, dtype=np.float32)
dev_Y = np.array(pd.get_dummies(dev_set.iloc[:, 0]), dtype=np.float32)

# Read CSV
csv_read = pd.read_csv(DATA_TEST_DIR, header=None, sep=',', dtype='uint8')

# Normalization and One-hot encoder
test_X = np.array(train_set.iloc[:, 1:] / 255, dtype=np.float32)
test_Y = np.array(pd.get_dummies(train_set.iloc[:, 0]), dtype=np.float32)

# Assemble
data_set = {'train_X': train_X, 'train_Y': train_Y, 'dev_X': dev_X, 'dev_Y': dev_Y, 'test_X': test_X, 'test_Y': test_Y}

In [9]:
layer_info = [data_set['train_X'].shape[1], 100, 100, 100, 100, data_set['train_Y'].shape[1]]
train_model(data_set, layer_info, learning_rate_init=0.001, reg_rate=0.12, iteration_st=0, iteration=20, batch_size=128, logname='EMNISTletters_100_100_100_100_0.12_minibatch128_Adam')

epoch: 0, train_cost: 0.08419038355350494, train_acc: 76.56%, dev_cost: 0.07854300737380981, dev_acc: 80.41%
epoch: 1, train_cost: 0.07424628734588623, train_acc: 79.69%, dev_cost: 0.07067142426967621, dev_acc: 83.55%
epoch: 2, train_cost: 0.06911340355873108, train_acc: 82.81%, dev_cost: 0.06697901338338852, dev_acc: 84.71%
epoch: 3, train_cost: 0.06563639640808105, train_acc: 85.94%, dev_cost: 0.06448525935411453, dev_acc: 85.39%
epoch: 4, train_cost: 0.06335578858852386, train_acc: 85.94%, dev_cost: 0.06264330446720123, dev_acc: 85.91%
epoch: 5, train_cost: 0.06160394102334976, train_acc: 85.94%, dev_cost: 0.0611581951379776, dev_acc: 86.35%
epoch: 6, train_cost: 0.060014933347702026, train_acc: 86.72%, dev_cost: 0.05990079045295715, dev_acc: 86.63%
epoch: 7, train_cost: 0.05856204777956009, train_acc: 88.28%, dev_cost: 0.05880029872059822, dev_acc: 86.87%
epoch: 8, train_cost: 0.0573691762983799, train_acc: 88.28%, dev_cost: 0.05785871669650078, dev_acc: 87.05%
epoch: 9, train_cost

In [10]:
model = 'EMNISTletters_100_100_100_100_0.12_minibatch128_Adam'
_, accuracy = predict(model, data_set['train_X'], data_set['train_Y'], layer_info)
print('The train_set accuracy is : ', accuracy*100, '%')
_, accuracy = predict(model, data_set['dev_X'], data_set['dev_Y'], layer_info)
print('The dev_set accuracy is : ', accuracy*100, '%')
_, accuracy = predict(model, data_set['test_X'], data_set['test_Y'], layer_info)
print('The test_set accuracy is : ', accuracy*100, '%')
print('-----------------------------')

INFO:tensorflow:Restoring parameters from ./checkpoint/EMNISTletters_100_100_100_100_0.12_minibatch128_Adam
The train_set accuracy is :  90.63766598701477 %
INFO:tensorflow:Restoring parameters from ./checkpoint/EMNISTletters_100_100_100_100_0.12_minibatch128_Adam
The dev_set accuracy is :  88.30518126487732 %
INFO:tensorflow:Restoring parameters from ./checkpoint/EMNISTletters_100_100_100_100_0.12_minibatch128_Adam
The test_set accuracy is :  90.63766598701477 %
-----------------------------
