In [1]:
'''
A Multilayer Perceptron implementation example using TensorFlow library.
This example is using the MNIST database of handwritten digits
(http://yann.lecun.com/exdb/mnist/)

Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
'''

'\nA Multilayer Perceptron implementation example using TensorFlow library.\nThis example is using the MNIST database of handwritten digits\n(http://yann.lecun.com/exdb/mnist/)\n\nAuthor: Aymeric Damien\nProject: https://github.com/aymericdamien/TensorFlow-Examples/\n'

In [2]:
import tensorflow as tf
import input_data
import os
import numpy as np
import cPickle as pickle
import util
from sklearn.model_selection import train_test_split

# LOAD DATA

In [3]:
# True if you don't want to use the test data and you just want to evaluate the model
# on the training data
testing = True

In [4]:
with open('X_train.pkl') as f:
    X_train = pickle.load(f).todense()
with open('t_train.pkl') as f:
    Y_train = pickle.load(f)
with open('X_test.pkl') as f:
    X_test = pickle.load(f).todense()
with open('test_ids.pkl') as f:
    test_ids = pickle.load(f)
    
nclass = 1 + Y_train.max()
Y_one_hot_train = np.eye(nclass)[Y_train]

In [5]:
if testing:
    train_data, test_data, train_label, test_label = train_test_split(X_train, Y_one_hot_train, test_size=0.20, random_state=42)  
else:
    train_data = X_train
    test_data = X_test
    train_label=Y_one_hot

In [6]:
ntrain = train_data.shape[0]
dim = train_data.shape[1]
ntest = test_data.shape[0]

In [7]:
print ("%d train images loaded" % (ntrain))
print ("%d test images loaded" % (ntest))
print ("%d dimensional input" % (dim))
print ("%d classes" % (nclass))

2468 train images loaded
618 test images loaded
31 dimensional input
15 classes


# DEFINE NETWORK

In [12]:
tf.set_random_seed(0)
# Parameters
learning_rate   = 0.001
training_epochs = 4000
batch_size      = ntrain
display_step    = 1000

# Network Parameters
n_hidden_1 = 50 # 1st layer num features
n_input    = dim # data input 
n_classes  = nclass # total classes (0-9 digits)

# tf Graph input
x = tf.placeholder("float", [None, n_input])
y = tf.placeholder("float", [None, n_classes])

# Create model
def multilayer_perceptron(_X, _weights, _biases):
    layer_1 = tf.nn.relu(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])) 
    return tf.matmul(layer_1, _weights['out']) + _biases['out']
    
# Store layers weight & bias
stddev = 0.1 # <== This greatly affects accuracy!! 
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], stddev=stddev)),
    'out': tf.Variable(tf.random_normal([n_hidden_1, n_classes], stddev=stddev))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}
print ("Network Ready to Go!")

Network Ready to Go!


# DEFINE FUNCTIONS

In [13]:
# Construct model
pred = multilayer_perceptron(x, weights, biases)

final_pred = tf.argmax(pred,1)

# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) 
optm = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
corr = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))    
accr = tf.reduce_mean(tf.cast(corr, "float"))

# Initializing the variables
init = tf.initialize_all_variables()
print ("Functions ready")

Functions ready


# OPTIMIZE

In [14]:
%%time
# Launch the graph
sess = tf.Session()
sess.run(init)

# Training cycle
for epoch in range(training_epochs):
    avg_cost = 0.
    total_batch = int(ntrain/batch_size)
    # Loop over all batches
    for i in range(total_batch):
        randidx = np.random.randint(ntrain, size=batch_size)
        batch_xs = train_data[randidx, :]
        batch_ys = train_label[randidx, :]   
        # Fit training using batch data
        sess.run(optm, feed_dict={x: batch_xs, y: batch_ys})
        # Compute average loss
        avg_cost += sess.run(cost, 
                feed_dict={x: batch_xs, y: batch_ys})/total_batch
        # Display logs per epoch step
    if epoch % display_step == 0:
        print ("Epoch: %03d/%03d cost: %.9f" % 
               (epoch, training_epochs, avg_cost))
        train_acc = sess.run(accr, feed_dict={x: batch_xs, y: batch_ys})
        print (" Training accuracy: %.3f" % (train_acc))
        if testing:
            test_acc = sess.run(accr, feed_dict={x: test_data, y: test_label})
            print (" Test accuracy: %.3f" % (test_acc))
    if epoch == training_epochs - 1:       
        y_pred = sess.run(final_pred, feed_dict={x: test_data})
        
print ("Optimization Finished!")

Epoch: 000/9000 cost: 131.004425049
 Training accuracy: 0.012
 Test accuracy: 0.006
Epoch: 1000/9000 cost: 1.592013955
 Training accuracy: 0.525
 Test accuracy: 0.515
Epoch: 2000/9000 cost: 1.478789091
 Training accuracy: 0.622
 Test accuracy: 0.620
Epoch: 3000/9000 cost: 1.420223832
 Training accuracy: 0.669
 Test accuracy: 0.675
Epoch: 4000/9000 cost: 1.366180420
 Training accuracy: 0.697
 Test accuracy: 0.672
Epoch: 5000/9000 cost: 1.230615616
 Training accuracy: 0.689
 Test accuracy: 0.667
Epoch: 6000/9000 cost: 1.277993321
 Training accuracy: 0.682
 Test accuracy: 0.668
Epoch: 7000/9000 cost: 1.114051700
 Training accuracy: 0.652
 Test accuracy: 0.660
Epoch: 8000/9000 cost: 1.083078027
 Training accuracy: 0.666
 Test accuracy: 0.672
Optimization Finished!
CPU times: user 15min 18s, sys: 38.5 s, total: 15min 57s
Wall time: 5min 49s


In [15]:
if not testing:
    util.write_predictions(y_pred, test_ids, 'predictions_9k_nn_orig_features.csv')