In [1]:
import warnings
warnings.filterwarnings('ignore')

# Широкий экран
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR) # turn off tf warnings

# from tensorflow.contrib.layers import fully_connected - alternative in part about MNIST implementation in pure tf 


import numpy as np

In [2]:
# def reset_graph(seed=42):
#     tf.reset_default_graph()
#     tf.set_random_seed(seed)
#     np.random.seed(seed)

In [3]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [4]:
print('X_train shape: ', X_train.shape)
print('X_test shape: ', X_test.shape)
print('y_train shape: ', y_train.shape)
print('y_test shape: ', y_test.shape)

X_train shape:  (60000, 28, 28)
X_test shape:  (10000, 28, 28)
y_train shape:  (60000,)
y_test shape:  (10000,)


In [5]:
X_train =X_train.astype(np.float32).reshape(-1, 28*28)/255.0 #flatten data for each object
X_test =X_test.astype(np.float32).reshape(-1, 28*28)/255.0 
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [6]:
X_train.shape

(55000, 784)

**MNIST with hight-level API**

In [None]:
feature_cols = [tf.feature_column.numeric_column('X', shape=[28 * 28])] #Set any flattened pixel of picture as feature https://www.tensorflow.org/guide/feature_columns 

dnn_clf = tf.estimator.DNNClassifier(hidden_units = [300, 100], n_classes = 10, feature_columns = feature_cols) # set MLP with 2 layers: 300 and 100 neurons

input_fn = tf.estimator.inputs.numpy_input_fn(x = {'X': X_train}, y = y_train, num_epochs = 40, batch_size = 50, shuffle = True)
# train model
dnn_clf.train(input_fn = input_fn)

In [None]:
#evaluate model
test_input_fn = tf.estimator.inputs.numpy_input_fn( x = {'X': X_test}, y = y_test, shuffle = False)
eval_results = dnn_clf.evaluate(input_fn = test_input_fn)

In [None]:
eval_results

In [None]:
y_pred_iter = dnn_clf.predict(input_fn = test_input_fn)
y_pred = list(y_pred_iter) #get prediction information list from iterator object
y_pred[0]['class_ids']

**MNIST with plain TensorFlow**

In [7]:
n_inputs = 28*28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

In [8]:
X = tf.placeholder(tf.float32, shape = (None, n_inputs), name = 'X')
y = tf.placeholder(tf.int64, shape = (None), name = 'y')

In [9]:
def neuron_layer(X, n_neurons, name, activation = None):
    '''
    Create one layer at a time
    '''
    with tf.name_scope(name): # for well organization in TensorBoard
        n_inputs = int(X.get_shape()[1])
        stddev = 2 / np.sqrt(n_inputs) 
        init = tf.truncated_normal((n_inputs, n_neurons), stddev= stddev) #sttdev, init - initialization parameters for rundom set up W matrix
        W = tf.Variable(init, name= 'weights')
        b = tf.Variable(tf.zeros([n_neurons]), name = 'biases')
        z = tf.matmul(X, W) + b
        if activation is not None:
            return activation(z)
        else:
            return z

In [10]:
with tf.name_scope('dnn'):
    hidden1 = neuron_layer(X, n_hidden1, 'hidden1', activation = tf.nn.relu)  # or hidden1 = fully_connected(X, n_hidden1, scope = 'hidden1')
    hidden2 = neuron_layer(hidden1, n_hidden2, 'hidden2', activation = tf.nn.relu) # or hidden2 = fully_connected(hidden1, n_hidden2, scope = 'hidden2')
    # values just before softmax computations (optimization reason)
    logits = neuron_layer(hidden2, n_outputs, 'outputs')  # or logits = fully_connected(hidden2, n_outputs, scope = 'outputs', activation_fn = None) 
    

In [11]:
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits)
    loss = tf.reduce_mean(xentropy, name = 'loss')

In [12]:
learning_rate = 0.01
with tf.name_scope('train'):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

In [13]:
with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(logits, y, 1) #check whether or not the highest logit correspond to the target class
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))    

In [14]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [15]:
n_epochs = 40
batch_size = 50

In [16]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X)//batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch

In [17]:
# reset_graph()
with tf.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict = {X: X_batch, y: y_batch})
        acc_batch = accuracy.eval(feed_dict = {X: X_valid, y: y_valid})
        acc_val = accuracy.eval(feed_dict = {X: X_valid, y: y_valid})
        if epoch % 5 == 0:
            print(epoch, 'Batch:', acc_batch, 'Val accuracy:', acc_val)
    
    save_path = saver.save(sess, './model_chapter10_plain_tf.ckpt')

0 Batch: 0.917 Val accuracy: 0.917
5 Batch: 0.9576 Val accuracy: 0.9576
10 Batch: 0.9678 Val accuracy: 0.9678
15 Batch: 0.97 Val accuracy: 0.97
20 Batch: 0.974 Val accuracy: 0.974
25 Batch: 0.9758 Val accuracy: 0.9758
30 Batch: 0.9764 Val accuracy: 0.9764
35 Batch: 0.9772 Val accuracy: 0.9772


In [18]:
with tf.Session() as sess:
    saver.restore(sess, "./model_chapter10_plain_tf.ckpt") 
    X_new_scaled = X_test[:20]
    Z = logits.eval(feed_dict={X: X_new_scaled})
    y_pred = np.argmax(Z, axis=1)

In [19]:
print('Predicted classes:', y_pred)
print('Actual classes:', y_test[:20])


Predicted classes: [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
Actual classes: [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
