In [26]:
import numpy as np
import tensorflow as tf

In [27]:
from sklearn.datasets import load_digits
digits = load_digits()

In [28]:
# scale dataset to [0, 1] - min-max scaling
digits_scaled = digits['data'].astype('f4') / np.max(digits['data'])
np.min (digits_scaled), np.max (digits_scaled)

(0.0, 1.0)

In [29]:
# split dataset into train, validation, test set
# take out 20% of each target class for test
data = []
idx_test = []
for i in range (digits['target_names'].shape[0]):
    i_idx = np.where (digits['target'].reshape (-1, 1) == i)[0]
    rnd_i_idx = np.random.choice(i_idx, int (np.floor (len (i_idx)*0.2)), replace = False)
    digits_Xy = np.concatenate((digits_scaled[rnd_i_idx], digits['target'][rnd_i_idx].reshape (-1, 1)), axis = 1)
    data.append (digits_Xy)
    idx_test.append (rnd_i_idx)
digits_Xy_test = np.vstack (data)
digits_X_test = digits_Xy_test[:, :64]
digits_y_test = digits_Xy_test[:, 64].astype ('i4')
# delete the test data from dataset
mask = np.ones (digits_scaled.shape[0], dtype = bool)
mask[np.hstack (idx_test)] = False
digits_X = digits_scaled[mask]
digits_y = digits['target'][mask]
# split remaining dataset into train and validation set
from sklearn.model_selection import train_test_split
digits_X_train, digits_X_val, digits_y_train, digits_y_val = train_test_split(digits_X, digits_y, test_size = 0.2, shuffle = True, random_state = 42)

In [30]:
print('train set:', digits_X_train.shape, digits_y_train.shape)
print('val set:', digits_X_val.shape, digits_y_val.shape)
print('test set:', digits_X_test.shape, digits_y_test.shape)

train set: (1153, 64) (1153,)
val set: (289, 64) (289,)
test set: (355, 64) (355,)


In [31]:
import tensorflow as tf
def nn_layer(inputs,units,activation_fn=None):
  X=inputs
  r = 2/np.sqrt(units)
  w_init = tf.random.uniform (shape = [int (X.get_shape()[1]), units], minval = -1.*r, maxval = 1.*r)
  W = tf.Variable(
      initial_value = w_init,
      name = 'weights'
      )
  # b, shape (u, 1)
  b = tf.Variable(
      initial_value = tf.zeros (shape = [units]),
      name = 'bias'
      )
  # Z = X * W + b with shape (k, u)
  Z = tf.matmul (X, W) + b
  # H = activation (Z)
  if activation_fn:
    return activation_fn(Z)
  else:
    return Z

In [32]:
# construction phase
n_inputs = 8*8
n_hidden1 = 256
n_hidden2 = 128
n_outputs = 10
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
tf.compat.v1.reset_default_graph ()
tf.compat.v1.disable_eager_execution()

X = tf.compat.v1.placeholder(
    dtype = tf.float32,
    shape = (None, n_inputs),
    name = 'X'
)
y = tf.compat.v1.placeholder (
    dtype = tf.int32,
    shape = (None),
    name = 'y'
)
fc1 = nn_layer(
    inputs = X,
    units = n_hidden1,
    activation_fn = tf.nn.relu
)
fc2 = nn_layer(
    inputs = fc1,
    units = n_hidden2,
    activation_fn = tf.nn.relu
)
logits = nn_layer (
    inputs = fc2,
    units = n_outputs,
    activation_fn = None
)
with tf.name_scope ('loss'): # cost function
    # sparse_softmax_cross_entropy_with_logits includes softmax activation function
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits (
        labels = y,
        logits = logits
    )
    loss = tf.reduce_mean (xentropy, name = 'loss')
LR = 0.01
with tf.name_scope ('train'):
    optimizer = tf.train.GradientDescentOptimizer (learning_rate = LR)
    training_op = optimizer.minimize (loss)
with tf.name_scope ('eval'):
    correct = tf.nn.in_top_k (
        predictions = logits,
        targets = y,
        k = 1
    )
    accuracy = tf.reduce_mean (tf.cast (correct, tf.float32))
init = tf.global_variables_initializer ()
saver = tf.train.Saver ()

In [33]:
# execution phase
N_EPOCHS = 20
BATCH_SIZE = 20
n_batches = int (np.ceil(digits_X_train.shape[0] / BATCH_SIZE))
with tf.Session () as sess:
    init.run ()
    for epoch in range (1, N_EPOCHS+1):
        for it in range (n_batches):
            feed_dict = {
                X : digits_X_train[it*BATCH_SIZE:(it+1)*BATCH_SIZE, :],
                y : digits_y_train[it*BATCH_SIZE:(it+1)*BATCH_SIZE]
            }
            sess.run (training_op, feed_dict = feed_dict)
        acc_train = accuracy.eval (feed_dict = feed_dict)
        acc_val = accuracy.eval (feed_dict = {X : digits_X_val, y : digits_y_val})
        print (epoch, ', train acc:', acc_train, ', val acc:', acc_val)
    save_path = saver.save (sess, './digits_final.ckpt')

1 , train acc: 0.84615386 , val acc: 0.7750865
2 , train acc: 0.9230769 , val acc: 0.8512111
3 , train acc: 0.9230769 , val acc: 0.89619374
4 , train acc: 0.9230769 , val acc: 0.90657437
5 , train acc: 1.0 , val acc: 0.9134948
6 , train acc: 1.0 , val acc: 0.9273356
7 , train acc: 1.0 , val acc: 0.93079585
8 , train acc: 1.0 , val acc: 0.93771625
9 , train acc: 1.0 , val acc: 0.93771625
10 , train acc: 1.0 , val acc: 0.93771625
11 , train acc: 1.0 , val acc: 0.93771625
12 , train acc: 1.0 , val acc: 0.9411765
13 , train acc: 1.0 , val acc: 0.9411765
14 , train acc: 1.0 , val acc: 0.9411765
15 , train acc: 1.0 , val acc: 0.9411765
16 , train acc: 1.0 , val acc: 0.9411765
17 , train acc: 1.0 , val acc: 0.9411765
18 , train acc: 1.0 , val acc: 0.9411765
19 , train acc: 1.0 , val acc: 0.9411765
20 , train acc: 1.0 , val acc: 0.9446367


In [34]:
# predictions
with tf.Session() as sess:
    saver.restore (sess, save_path)
    Z = logits.eval (feed_dict = {X : digits_X_test})
    y_pred_distr = tf.nn.softmax (Z).eval ()
    y_pred = np.argmax (y_pred_distr, axis = 1)
    acc_test = accuracy.eval (feed_dict = {X : digits_X_test, y : digits_y_test})

INFO:tensorflow:Restoring parameters from ./digits_final.ckpt


In [35]:
acc_test

0.94929576