In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
def reset_graph (seed=42):
    tf.reset_default_graph()
    tf.set_random_seed(seed)
    np.random.seed(seed)

In [4]:
raw_data = pd.read_csv('data/train.csv')
data, label =  raw_data.drop('label', axis=1).values, raw_data.loc[:,'label'].values
data = data.astype(np.float32)
label = label.astype(np.float32)

In [5]:
import clean_data
X_scaled = clean_data.num_pipeline(data)
X_reduced = clean_data.inc_pca(X_scaled)

In [6]:
X_valid, X_train = X_reduced[:6000], X_reduced[6000:]
y_valid, y_train = label[:6000], label[6000:]

In [7]:
X_train.shape

(54000, 196)

In [8]:
y_train.shape

(54000,)

In [9]:
height = 14
width = 14
channels = 1
n_inputs = height * width

conv1_fmaps = 32
conv1_ksize = 3
conv1_stride = 1
conv1_pad = "SAME"

conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 1
conv2_pad = "SAME"

pool3_fmaps = conv2_fmaps

n_fc1 = 64
n_outputs = 10

reset_graph()

In [10]:
with tf.name_scope("inputs"):
    X = tf.placeholder(tf.float32, shape=[None, n_inputs], name="X")
    X_reshaped = tf.reshape(X, shape=[-1, height, width, channels])
    y = tf.placeholder(tf.int32, shape=[None], name="y")

In [11]:
conv1 = tf.layers.conv2d(X_reshaped, 
                         filters=conv1_fmaps, 
                         kernel_size=conv1_ksize,
                         strides=conv1_stride, 
                         padding=conv1_pad,
                         activation=tf.nn.relu, 
                         name="conv1")

conv2 = tf.layers.conv2d(conv1, 
                         filters=conv2_fmaps, 
                         kernel_size=conv2_ksize,
                         strides=conv2_stride, 
                         padding=conv2_pad,
                         activation=tf.nn.relu, 
                         name="conv2")

In [12]:
with tf.name_scope("pool3"):
    pool3 = tf.nn.max_pool(conv2, 
                           ksize=[1, 2, 2, 1], 
                           strides=[1, 2, 2, 1], 
                           padding="VALID")
    pool3_flat = tf.reshape(pool3, 
                            shape=[-1, pool3_fmaps * 7 * 7])

In [13]:
with tf.name_scope("fc1"):
    fc1 = tf.layers.dense(pool3_flat, 
                          n_fc1, 
                          activation=tf.nn.relu, 
                          name="fc1")

In [14]:
with tf.name_scope("output"):
    logits = tf.layers.dense(fc1, 
                             n_outputs, 
                             name="output")
    Y_proba = tf.nn.softmax(logits, 
                            name="Y_proba")

In [15]:
with tf.name_scope("train"):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
    loss = tf.reduce_mean(xentropy)
    optimizer = tf.train.AdamOptimizer()
    training_op = optimizer.minimize(loss)

In [16]:
with tf.name_scope("eval"):
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [17]:
with tf.name_scope("init_and_save"):
    init = tf.global_variables_initializer()
    saver = tf.train.Saver()

In [18]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [20]:
n_epochs = 10
batch_size = 40
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "CNN_models/my_model_final.ckpt")

0 Batch accuracy: 0.775 Validation accuracy: 0.86366665
1 Batch accuracy: 0.95 Validation accuracy: 0.88
2 Batch accuracy: 1.0 Validation accuracy: 0.8831667
3 Batch accuracy: 0.9 Validation accuracy: 0.8886667
4 Batch accuracy: 0.925 Validation accuracy: 0.8831667
5 Batch accuracy: 0.95 Validation accuracy: 0.88166666
6 Batch accuracy: 0.975 Validation accuracy: 0.8775
7 Batch accuracy: 1.0 Validation accuracy: 0.87866664
8 Batch accuracy: 0.975 Validation accuracy: 0.8785
9 Batch accuracy: 1.0 Validation accuracy: 0.877
