In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
LABELS = 10
IMAGE_SIZE = 28

VALIDATION_SIZE = 2000

TRAINING_ITERATIONS = 20000
BATCH = 50
KERNEL_SIZE = 5
DEPTH = 32
DENSE_HIDDEN_LAYERS = 1024
LR = 0.001

In [3]:
data = pd.read_csv('train.csv')

train_df = data.iloc[:, 1:].values
train_df = train_df.astype(np.float)

train_df = np.multiply(train_df, 1.0 / 255.0)

labels_flat = data.iloc[:, 0].values.ravel()


def dense_to_one_hot(labels_dense, num_classes):
    num_labels = labels_dense.shape[0]
    index_offset = np.arange(num_labels) * num_classes
    labels_one_hot = np.zeros((num_labels, num_classes))
    labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
    return labels_one_hot

labels = dense_to_one_hot(labels_flat, LABELS)
labels = labels.astype(np.uint8)


validation_data = train_df[:VALIDATION_SIZE]
validation_labels = labels[:VALIDATION_SIZE]

train_data = train_df[VALIDATION_SIZE:]
train_labels = labels[VALIDATION_SIZE:]

In [4]:
X = tf.placeholder('float', shape=[None, IMAGE_SIZE * IMAGE_SIZE])
y_ = tf.placeholder('float', shape=[None, LABELS])

input_layer = tf.reshape(X, [-1, IMAGE_SIZE, IMAGE_SIZE, 1])

conv1 = tf.layers.conv2d(
    inputs=input_layer,
    filters=DEPTH,
    kernel_size=[KERNEL_SIZE, KERNEL_SIZE],
    padding="same",
    activation=tf.nn.relu)

pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
conv2 = tf.layers.conv2d(
    inputs=pool1,
    filters=2 * DEPTH,
    kernel_size=[KERNEL_SIZE, KERNEL_SIZE],
    padding="same",
    activation=tf.nn.relu)  

pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

pool2_flat = tf.reshape(pool2, [-1, IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 2 *DEPTH])
dense = tf.layers.dense(inputs=pool2_flat, units=DENSE_HIDDEN_LAYERS, activation=tf.nn.relu)
dropout = tf.layers.dropout(
    inputs=dense, rate=0.5)

logits = tf.layers.dense(inputs=dropout, units=LABELS)

y = tf.nn.softmax(logits)

loss = tf.losses.softmax_cross_entropy(
    onehot_labels=y_, logits=logits)
train_step = tf.train.AdamOptimizer(LR).minimize(loss)

correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))

accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))

predict = tf.argmax(y, 1)

In [5]:
# Loading Batches
epochs_completed = 0
index_in_epoch = 0
num_examples = train_data.shape[0]


def next_batch(batch_size):
    global train_data
    global train_labels
    global index_in_epoch
    global epochs_completed
    
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > num_examples:
        # finished epoch
        epochs_completed += 1
        # shuffle the data
        perm = np.arange(num_examples)
        np.random.shuffle(perm)
        train_data = train_data[perm]
        train_labels = train_labels[perm]
        # start next epoch
        start = 0
        index_in_epoch = batch_size
        assert batch_size <= num_examples
    end = index_in_epoch
    return train_data[start:end], train_labels[start:end]

In [6]:
# start TensorFlow session
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()

sess.run(init)

In [8]:
# training 
display_step = 1

for i in range(TRAINING_ITERATIONS):

    # get new batch
    batch_xs, batch_ys = next_batch(BATCH)        

    # check progress on every 1st,2nd,...,10th,20th,...,100th... step
    if i % display_step == 0 or (i+1) == TRAINING_ITERATIONS:
        
        train_accuracy = accuracy.eval(feed_dict={X: batch_xs, 
                                                  y_: batch_ys})       
        if VALIDATION_SIZE:
            validation_accuracy = accuracy.eval(feed_dict={X: validation_data[0:BATCH], 
                                                           y_: validation_labels[0:BATCH]})                                  
            print('training_accuracy | validation_accuracy = %.2f | %.2f for step %d' % (train_accuracy, 
                                                                                         validation_accuracy,
                                                                                         i))
            
        else:
            print('training_accuracy => %.4f for step %d'%(train_accuracy, i))
        
        # increase display_step
        if i % (display_step*10) == 0 and i:
            display_step *= 10
    # train on batch
    sess.run(train_step, feed_dict={X: batch_xs, y_: batch_ys})

training_accuracy | validation_accuracy = 0.14 | 0.18 for step 0


training_accuracy | validation_accuracy = 0.24 | 0.26 for step 1


training_accuracy | validation_accuracy = 0.34 | 0.30 for step 2


training_accuracy | validation_accuracy = 0.28 | 0.32 for step 3


training_accuracy | validation_accuracy = 0.44 | 0.42 for step 4


training_accuracy | validation_accuracy = 0.60 | 0.46 for step 5


training_accuracy | validation_accuracy = 0.54 | 0.54 for step 6


training_accuracy | validation_accuracy = 0.66 | 0.68 for step 7


training_accuracy | validation_accuracy = 0.70 | 0.68 for step 8


training_accuracy | validation_accuracy = 0.60 | 0.66 for step 9


training_accuracy | validation_accuracy = 0.76 | 0.74 for step 10


training_accuracy | validation_accuracy = 0.84 | 0.84 for step 20


training_accuracy | validation_accuracy = 0.92 | 0.88 for step 30


training_accuracy | validation_accuracy = 0.94 | 0.94 for step 40


training_accuracy | validation_accuracy = 0.96 | 0.94 for step 50


training_accuracy | validation_accuracy = 0.96 | 0.96 for step 60


training_accuracy | validation_accuracy = 0.96 | 0.96 for step 70


training_accuracy | validation_accuracy = 0.92 | 0.96 for step 80


training_accuracy | validation_accuracy = 0.94 | 0.96 for step 90


training_accuracy | validation_accuracy = 0.98 | 0.92 for step 100


training_accuracy | validation_accuracy = 0.98 | 0.96 for step 200


training_accuracy | validation_accuracy = 0.98 | 0.98 for step 300


training_accuracy | validation_accuracy = 0.98 | 0.98 for step 400


training_accuracy | validation_accuracy = 0.98 | 1.00 for step 500


training_accuracy | validation_accuracy = 0.96 | 1.00 for step 600


training_accuracy | validation_accuracy = 0.96 | 1.00 for step 700


training_accuracy | validation_accuracy = 0.98 | 1.00 for step 800


training_accuracy | validation_accuracy = 0.96 | 1.00 for step 900


training_accuracy | validation_accuracy = 1.00 | 1.00 for step 1000


training_accuracy | validation_accuracy = 1.00 | 1.00 for step 2000


training_accuracy | validation_accuracy = 0.98 | 1.00 for step 3000


training_accuracy | validation_accuracy = 1.00 | 1.00 for step 4000


training_accuracy | validation_accuracy = 1.00 | 1.00 for step 5000


training_accuracy | validation_accuracy = 0.98 | 1.00 for step 6000


training_accuracy | validation_accuracy = 1.00 | 1.00 for step 7000


training_accuracy | validation_accuracy = 1.00 | 1.00 for step 8000


training_accuracy | validation_accuracy = 1.00 | 1.00 for step 9000


training_accuracy | validation_accuracy = 1.00 | 1.00 for step 10000


training_accuracy | validation_accuracy = 1.00 | 1.00 for step 19999


In [9]:
if VALIDATION_SIZE:
    validation_accuracy = accuracy.eval(feed_dict={X: validation_data, 
                                                   y_: validation_labels})
    print('validation_accuracy => %.4f' % validation_accuracy)

validation_accuracy => 0.9940


In [11]:
test_data = pd.read_csv('test.csv').values

predictions = np.zeros(test_data.shape[0])
for i in range(0, test_data.shape[0] // BATCH):
    predictions[i*BATCH: (i+1)*BATCH] = predict.eval(feed_dict={X: test_data[i*BATCH: (i+1)*BATCH]})

In [12]:
np.savetxt('submission3.csv', 
           np.c_[range(1, len(test_data) + 1), predictions], 
           delimiter=',', 
           header='ImageId,Label', 
           comments='', 
           fmt='%d')