## MNIST - Softmax model 

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf # tensor flow
from keras.utils.np_utils import to_categorical # one hot encoding

Using TensorFlow backend.


In [57]:
# configuration parameter
TRAINING_ITERATIONS = 100000
BATCH_SIZE = 128

In [58]:
# read training data from CSV file
dataset = pd.read_csv("./data/train.csv")
x_train = dataset.iloc[:,1:].values.astype('float32')
y_train = dataset.iloc[:,0].values.astype('float32')
y_train = to_categorical(y_train)
y_train = y_train.astype(np.uint8)

In [59]:
# data normalization
x_train = np.multiply(x_train, 1.0/255.0)

In [60]:
epochs_completed = 0
index_in_epoch = 0
num_examples = y_train.shape[0]

def next_batch(batch_size, i):
    global x_train
    global y_train
    global index_in_epoch
    global epochs_completed
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    # when all trainig data have been already used, it is reorder randomly
    if index_in_epoch > num_examples:
        # finished epoch
        epochs_completed += 1
        # shuffle the data
        perm = np.arange(num_examples)
        np.random.shuffle(perm)
        x_train = x_train[perm]
        y_train = y_train[perm]
        # start next epoch
        start = 0
        index_in_epoch = batch_size
    end = index_in_epoch
    return x_train[start:end], y_train[start:end]

In [61]:
def add_layer(inputs, in_size, out_size, activation_function=None):
    # add one more layer and return the output of this layer
    with tf.name_scope('layer'):
        with tf.name_scope('weights'):
            Weights = tf.Variable(tf.random_normal([in_size, out_size]), name='W')
        with tf.name_scope('biases'):
            biases = tf.Variable(tf.zeros([1, out_size]) + 0.1, name='b')
        with tf.name_scope('Wx_plus_b'):
            Wx_plus_b = tf.add(tf.matmul(inputs, Weights), biases)
        if activation_function is None:
            outputs = Wx_plus_b
        else:
            outputs = activation_function(Wx_plus_b, )
        return outputs

In [62]:
with tf.name_scope("inputs"):
    x_ = tf.placeholder(tf.float32, [None, 784], name="x_input")
    y_ = tf.placeholder(tf.float32, [None, 10], name="y_input")

In [63]:
# add hidden layer
y_predict = add_layer(x_, 784, 10, activation_function=tf.nn.softmax)

In [64]:
# the error between prediciton and real data
with tf.name_scope('loss'):
    cross_entropy = -tf.reduce_sum(y_ * tf.log(y))

In [65]:
with tf.name_scope('train'):
    train_step = tf.train.GradientDescentOptimizer(0.001).minimize(cross_entropy)

In [67]:
writer = tf.summary.FileWriter("./logs/", sess.graph)

In [68]:
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [51]:
# train
display_step = 1
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

for i in range(TRAINING_ITERATIONS):
    batch_xs, batch_ys = next_batch(BATCH_SIZE, i)
    sess.run(train_step, feed_dict={x_: batch_xs, y_: batch_ys})
    # check progress on every 1st,2nd,...,10th,20th,...,100th... step
    if i % display_step == 0 or (i + 1) == TRAINING_ITERATIONS:
        train_accuracy = sess.run(accuracy, feed_dict={x_: batch_xs, y_: batch_ys})
        print('training_accuracy => %.4f for step %d' % (train_accuracy, i))
        
        # increase display_step
        if i % (display_step * 10) == 0 and i:
            display_step *= 10

training_accuracy => 0.5078 for step 0
training_accuracy => 0.5938 for step 1
training_accuracy => 0.5781 for step 2
training_accuracy => 0.6328 for step 3
training_accuracy => 0.7266 for step 4
training_accuracy => 0.7891 for step 5
training_accuracy => 0.7969 for step 6
training_accuracy => 0.7344 for step 7
training_accuracy => 0.7578 for step 8
training_accuracy => 0.8047 for step 9
training_accuracy => 0.8047 for step 10
training_accuracy => 0.8672 for step 20
training_accuracy => 0.8359 for step 30
training_accuracy => 0.8281 for step 40
training_accuracy => 0.8594 for step 50
training_accuracy => 0.8281 for step 60
training_accuracy => 0.8828 for step 70
training_accuracy => 0.8359 for step 80
training_accuracy => 0.9141 for step 90
training_accuracy => 0.8359 for step 100
training_accuracy => 0.8516 for step 200
training_accuracy => 0.9375 for step 300
training_accuracy => 0.8828 for step 400
training_accuracy => 0.9609 for step 500
training_accuracy => 0.9062 for step 600
trai

In [17]:
# read testing data from CSV file
x_test = pd.read_csv("./data/test.csv").values.astype('float32')

In [18]:
# convert from [0:255] => [0.0:1.0]
x_test = np.multiply(x_test, 1.0 / 255.0)

In [19]:
x_test.shape[0]

28000

In [54]:
# predict test set
predict = tf.argmax(y, 1)
y_test = sess.run(predict, feed_dict={x_: x_test})

In [55]:
# save results
# np.savetxt('submission.csv',
#            np.c_[range(1, len(x_test) + 1), y_test],
#            delimiter=',',
#            header='ImageId,Label',
#            comments='',
#            fmt='%d')

# sess.close()