In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

  from ._conv import register_converters as _register_converters


In [3]:
raw_data = pd.read_csv('data/train.csv')
X, Y =  raw_data.drop('label', axis=1).values, raw_data.loc[:,'label'].values

In [4]:
X = X.astype(np.float32)
Y = Y.astype(np.float32)

In [5]:
X.shape

(60000, 784)

In [6]:
Y.shape

(60000,)

In [7]:
import clean_data
X_scaled = clean_data.num_pipeline(X)
X_reduced = clean_data.inc_pca(X_scaled)

In [8]:
X_valid, X_train = X_reduced[:6000], X_reduced[6000:]
y_valid, y_train = Y[:6000], Y[6000:]

In [9]:
# Simpler way for DNN, but with limited functionality
'''
feature_cols = [tf.feature_column.numeric_column("X", shape=[28 * 28])]
dnn_clf = tf.estimator.DNNClassifier(hidden_units=[300,100], n_classes=10,
                                     feature_columns=feature_cols)

input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_scaled}, y=y, num_epochs=40, batch_size=50, shuffle=True)
dnn_clf.train(input_fn=input_fn)
'''
print ('')




In [10]:
'''
test_input_fn = tf.estimator.inputs.numpy_input_fn(
    x={"X": X_scaled}, y=y, shuffle=False)
eval_results = dnn_clf.evaluate(input_fn=test_input_fn)
'''
print ('')




In [11]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [12]:
n_inputs = 14*14  # dimensions were reduced
#n_inputs = 28*28
n_hidden1 = 150
n_hidden2 = 75
n_outputs = 10

In [13]:
X = tf.placeholder(tf.float32, 
                   shape=(None, n_inputs), 
                   name="X")
y = tf.placeholder(tf.int32, 
                   shape=(None), 
                   name="y")
training = tf.placeholder_with_default(False, 
                                       shape=(), 
                                       name="training")

In [14]:
dropout_rate = .5
X_drop = tf.layers.dropout(X, 
                           dropout_rate, 
                           training=training)

with tf.name_scope("dnn"):
    hidden1 = tf.layers.dense(X_drop, 
                              n_hidden1, 
                              name="hidden1",
                              activation=tf.nn.relu)
    hidden1_drop = tf.layers.dropout(hidden1, 
                                     dropout_rate, 
                                     training=training)
    
    hidden2 = tf.layers.dense(hidden1_drop, 
                              n_hidden2, 
                              name="hidden2",
                              activation=tf.nn.relu)
    hidden2_drop = tf.layers.dropout(hidden2, 
                                     dropout_rate, 
                                     training=training)
    
    logits = tf.layers.dense(hidden2_drop, 
                             n_outputs, 
                             name="outputs")
    y_proba = tf.nn.softmax(logits)

In [15]:
with tf.name_scope("loss"):
    # Our cost function
    # This computes the cross-entropy before going through softmax function
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, 
                                                              logits=logits)
    # Reduce mean computes mean cross entropy over all instances
    loss = tf.reduce_mean(xentropy, name="loss")

In [16]:
learning_rate = 0.01

with tf.name_scope("train"):
    # To tweak the parameters slightly to minimize the cost function
    optimizer = tf.train.AdamOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [17]:
with tf.name_scope("eval"):
    # This specifies how to evaluate the model
    """
        in_top_k determines if the nn prediction is correct by checking whether the 
        highest logit correspond to the correct class.
        It returns a 1D tensor of booleans, and we compute the average
    """
    correct = tf.nn.in_top_k(logits, y, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

In [18]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [20]:
n_epochs = 20
batch_size = 50
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
        acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, "Batch accuracy:", acc_batch, "Validation accuracy:", acc_valid)

    save_path = saver.save(sess, "DNN_models/my_model_final.ckpt")

0 Batch accuracy: 0.94 Validation accuracy: 0.85316664
1 Batch accuracy: 0.88 Validation accuracy: 0.8445
2 Batch accuracy: 0.84 Validation accuracy: 0.85316664
3 Batch accuracy: 0.94 Validation accuracy: 0.875
4 Batch accuracy: 0.92 Validation accuracy: 0.8541667
5 Batch accuracy: 0.88 Validation accuracy: 0.86366665
6 Batch accuracy: 0.9 Validation accuracy: 0.8665
7 Batch accuracy: 0.78 Validation accuracy: 0.8551667
8 Batch accuracy: 0.96 Validation accuracy: 0.8693333
9 Batch accuracy: 0.9 Validation accuracy: 0.8725
10 Batch accuracy: 0.94 Validation accuracy: 0.85
11 Batch accuracy: 0.96 Validation accuracy: 0.8715
12 Batch accuracy: 0.92 Validation accuracy: 0.85183334
13 Batch accuracy: 0.92 Validation accuracy: 0.85833335
14 Batch accuracy: 0.92 Validation accuracy: 0.8653333
15 Batch accuracy: 0.94 Validation accuracy: 0.85583335
16 Batch accuracy: 0.92 Validation accuracy: 0.8666667
17 Batch accuracy: 0.86 Validation accuracy: 0.853
18 Batch accuracy: 0.92 Validation accura