In [1]:
import numpy as np
import pandas as pd
import os

import tensorflow as tf

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import time

In [2]:
def reset_graph(seed = 42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [3]:
train_data = pd.read_csv("train.csv")
test_data = pd.read_csv("test.csv")

In [4]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB


In [5]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28000 entries, 0 to 27999
Columns: 784 entries, pixel0 to pixel783
dtypes: int64(784)
memory usage: 167.5 MB


In [6]:
x_train = train_data.iloc[:, 1:]
x_train = x_train.values.astype(np.float32).reshape(-1, 28*28) / 255
x_valid, x_train = x_train[:4200], x_train[4200:]

y_train = train_data.iloc[:, 0]
y_train = y_train.values.astype(np.float32)
y_valid, y_train = y_train[:4200], y_train[4200:]

x_test = test_data.values.astype(np.float32).reshape(-1, 28*28) / 255

In [7]:
def shuffle_batch(x, y, batch_size):
    rnd_i = np.random.permutation(len(x))
    n_batches = len(x) // batch_size
    
    for batch_i in np.array_split(rnd_i, n_batches):
        x_batch, y_batch = x[batch_i], y[batch_i]
        yield x_batch, y_batch

In [8]:
h = 28
w = 28
channels = 1
n_i = h*w
n_o = 10

In [9]:
conv1_fmaps = 32
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 1
conv2_pad = "SAME"
conv2_dropoutrate = 0.25

In [10]:
pool3_fmaps = conv2_fmaps

n_fc1 = 128
fc1_dropoutrate = 0.5

In [11]:
reset_graph()

In [12]:
with tf.name_scope("inputs"):
    x = tf.placeholder(tf.float32, shape = [None, n_i], name = "x")
    x_reshaped = tf.reshape(x, shape = [-1, h, w, channels])
    y = tf.placeholder(tf.int32, shape = [None], name = "y")
    training = tf.placeholder_with_default(False, shape = [], name = "training")

In [13]:
conv1 = tf.layers.conv2d(
    x_reshaped, 
    filters = conv1_fmaps,
    kernel_size = conv1_ksize,
    strides = conv1_stride,
    padding = conv1_pad,
    activation = tf.nn.relu,
    name = "conv1",
)

conv2 = tf.layers.conv2d(
    conv1, 
    filters = conv2_fmaps,
    kernel_size = conv2_ksize,
    strides = conv2_stride,
    padding = conv2_pad,
    activation = tf.nn.relu,
    name = "conv2",
)

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.


In [14]:
with tf.name_scope("pool3"):
    pool3 = tf.nn.max_pool(
        conv2,
        ksize = [1, 2, 2, 1],
        strides = [1, 2, 2, 1],
        padding = "VALID",
    )
    pool3_flat = tf.reshape(
        pool3,
        shape = [-1, pool3_fmaps*14*14],
    )
    pool3_flat_drop = tf.layers.dropout(
        pool3_flat,
        conv2_dropoutrate,
        training = training,
    )

Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [15]:
with tf.name_scope("fc1"):
    fc1 = tf.layers.dense(
        pool3_flat_drop,
        n_fc1,
        activation = tf.nn.relu,
        name = "fc1",
    )
    fc1_drop = tf.layers.dropout(
        fc1,
        fc1_dropoutrate,
        training = training,
    )

Instructions for updating:
Use keras.layers.dense instead.


In [16]:
with tf.name_scope("output"):
    logits = tf.layers.dense(
        fc1,
        n_o,
        name = "output",
    )
    y_prob = tf.nn.softmax(
        logits,
        name = "y_prob",
    )

In [17]:
with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits = logits,
        labels = y,
    )
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

In [18]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [19]:
with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

In [20]:
def get_model_parameters():
    gvars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
    return {
        gvar.op.name: value for gvar, value in zip(gvars, tf.get_default_session().run(gvars))
    }

In [21]:
def restore_model_parameters(model_parameters):
    gvar_names = list(model_parameters.keys())
    assign_ops = {
        gvar_name: tf.get_default_graph().get_operation_by_name(gvar_name + "/Assign")
        for gvar_name in gvar_names
    }
    init_values = {
        gvar_name: assign_op.inputs[1]
        for gvar_name, assign_op in assign_ops.items()
    }
    feed_dict = {
        init_values[gvar_name]: model_parameters[gvar_name]
        for gvar_name in gvar_names
    }
    tf.get_default_session().run(assign_ops, feed_dict = feed_dict)

In [22]:
n_epochs = 1000
batch_size = 50
i = 0

In [23]:
best_loss_val = np.infty
check_interval = 500
checks_since_last_progress = 0
max_checks_wo_progress = 20
best_model_params = None

In [24]:
i = 0

start = time.time()

with tf.Session() as tfs:
    init.run()
    
    for epoch in range(n_epochs):
        
        for x_batch, y_batch in shuffle_batch(x_train, y_train, batch_size):
            
            i += 1
            
            if i % 100 == 0:
                print(i)
            
            tfs.run(training_op, feed_dict = {x: x_batch, y: y_batch, training: True})
            
            if i % check_interval == 0:
                
                loss_val = loss.eval(feed_dict = {x: x_valid, y: y_valid})
                
                if loss_val < best_loss_val:
                    
                    best_loss_val = loss_val
                    checks_since_last_progress = 0
                    best_model_params = get_model_parameters()
                    
                else:
                    
                    checks_since_last_progress +=1

        acc_batch = accuracy.eval(feed_dict = {x: x_batch, y: y_batch})
        acc_val = accuracy.eval(feed_dict = {x: x_valid, y: y_valid})
        
        print(
            "Epoch {}:\nLast batch's accuracy: {:.4f}%\nValidation set's accuracy: {:.4f}%\nValidation set's best loss: {:.6f}".format(
                epoch,
                acc_batch*100,
                acc_val*100,
                best_loss_val
            )
        )
        
        if checks_since_last_progress > max_checks_wo_progress:
            print("Early stopping!")
            break
            
    if best_model_params:
        restore_model_parameters(best_model_params)
        
    save_path = saver.save(tfs, "./MNIST_CNN")
    
end = time.time()
print(end - start)

100
200
300
400
500
600
700
Epoch 0:
Last batch's accuracy: 100.0000%
Validation set's accuracy: 98.0714%
Validation set's best loss: 0.087169
800
900
1000
1100
1200
1300
1400
1500
Epoch 1:
Last batch's accuracy: 100.0000%
Validation set's accuracy: 98.5476%
Validation set's best loss: 0.050507
1600
1700
1800
1900
2000
2100
2200
Epoch 2:
Last batch's accuracy: 100.0000%
Validation set's accuracy: 98.2143%
Validation set's best loss: 0.041257
2300
2400
2500
2600
2700
2800
2900
3000
Epoch 3:
Last batch's accuracy: 100.0000%
Validation set's accuracy: 98.5952%
Validation set's best loss: 0.036586
3100
3200
3300
3400
3500
3600
3700
Epoch 4:
Last batch's accuracy: 100.0000%
Validation set's accuracy: 98.6429%
Validation set's best loss: 0.036586
3800
3900
4000
4100
4200
4300
4400
4500
Epoch 5:
Last batch's accuracy: 100.0000%
Validation set's accuracy: 98.6190%
Validation set's best loss: 0.036586
4600
4700
4800
4900
5000
5100
5200
Epoch 6:
Last batch's accuracy: 100.0000%
Validation set's 

In [25]:
#with tf.Session() as tfs:
#    saver.restore(tfs, "./MNIST_CNN")
#    pred_val = predictions.eval(feed_dict = {x: x_test})