In [1]:
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected

Para facilitar a alteração da camada de ativação e inicialização em todas as camadas, vamos utilizar o arg_scope. 
Para implementarmos a inicialização de He devemos utilizar o variable_scaler_initializar()

In [2]:
n_inputs = 28 * 28 # MNIST
n_outputs = 5

tf.reset_default_graph()

In [3]:
X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

In [4]:
he_init = tf.contrib.layers.variance_scaling_initializer()

In [5]:
def get_dnn_layers(inputs, n_neurons):
    with tf.name_scope("dnn"):
        for layer in range(5):
            inputs = fully_connected(inputs, n_neurons, 
                                     activation_fn=tf.nn.elu,
                                     weights_initializer=he_init,
                                     scope="hidden%d" % (layer+1))
    return inputs

In [6]:
dnn_outputs = get_dnn_layers(X, 100)
logits = fully_connected(dnn_outputs, n_outputs, weights_initializer=he_init, activation_fn=None, scope="logits")
y_probs = tf.nn.softmax(logits, name="y_probs")

<h4>training with Adam</h4>

In [49]:
learning_rate = 0.001

In [50]:
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

In [51]:
optimizer = tf.train.AdamOptimizer(learning_rate)
training_op = optimizer.minimize(loss, name="training_op")

In [52]:
is_correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32), name="accuracy")

In [53]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

<h4>carregando os daodos...</h4>

In [17]:
import numpy as np

In [29]:
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_data = mnist.train.images # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz


vamos criar um conjunto de validação para utilizarmos o early stopping

In [30]:
X_valid, X_train = train_data[:5000], train_data[5000:]
y_valid, y_train = train_labels[:5000], train_labels[5000:]

Como pede o exercício, vamos aprender a classificar apenas os digitos 0 a 4, o restante utilizaremos transfer learning

In [58]:
X_train1 = X_train[y_train < 5]
y_train1 = y_train[y_train < 5]
X_valid1 = X_valid[y_valid < 5]
y_valid1 = y_valid[y_valid < 5]
X_test1 = eval_data[eval_labels < 5]
y_test1 = eval_labels[eval_labels < 5]

In [32]:
n_epochs = 1000
batch_size = 20

Vamos criar uma variável para armazenar nossa melhor loss e duas para identificarmos se houve melhora na loss surante o treinamento e por quantas épocas. Assim, podemos acompanhar a loss para realizarmos early stopping

In [54]:
best_loss = np.infty

In [55]:
no_progress_epochs = 0
max_no_progress_epochs = 20

In [56]:
with tf.Session() as sess:
    init.run()
    
    for epoch in range(n_epochs):
        rnd_idx = np.random.permutation(len(X_train1))
        for rnd_indices in np.array_split(rnd_idx, len(X_train1)//batch_size):
            X_batch, y_batch = X_train1[rnd_indices], y_train1[rnd_indices]
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        loss_val, acc_val = sess.run([loss, accuracy], feed_dict={X: X_valid1, y: y_valid1})
        if loss_val < best_loss:
            save_path = saver.save(sess, "./my_mnist_model_0_to_4.ckpt")
            best_loss = loss_val
            no_progress_epochs = 0
        else:
            no_progress_epochs += 1
            if no_progress_epochs > max_no_progress_epochs:
                print("Early Stopping!")
                break
        print("{}\tValidation loss: {:.6f}\tBest loss: {:.6f} \tAccuracy: {:.2f}%".format(
              epoch, loss_val, best_loss, acc_val*100))

0	Validation loss: 0.069976	Best loss: 0.069976 	Accuracy: 97.98%
1	Validation loss: 0.056824	Best loss: 0.056824 	Accuracy: 98.17%
2	Validation loss: 0.073308	Best loss: 0.056824 	Accuracy: 97.55%
3	Validation loss: 0.059612	Best loss: 0.056824 	Accuracy: 98.29%
4	Validation loss: 0.063374	Best loss: 0.056824 	Accuracy: 98.60%
5	Validation loss: 0.058810	Best loss: 0.056824 	Accuracy: 98.25%
6	Validation loss: 0.078917	Best loss: 0.056824 	Accuracy: 97.82%
7	Validation loss: 0.060298	Best loss: 0.056824 	Accuracy: 98.76%
8	Validation loss: 0.075487	Best loss: 0.056824 	Accuracy: 98.45%
9	Validation loss: 0.049872	Best loss: 0.049872 	Accuracy: 98.68%
10	Validation loss: 0.074699	Best loss: 0.049872 	Accuracy: 98.21%
11	Validation loss: 0.128505	Best loss: 0.049872 	Accuracy: 97.67%
12	Validation loss: 0.047868	Best loss: 0.047868 	Accuracy: 98.80%
13	Validation loss: 0.054819	Best loss: 0.047868 	Accuracy: 98.99%
14	Validation loss: 0.086944	Best loss: 0.047868 	Accuracy: 98.37%
15	Va

In [59]:
with tf.Session() as sess:
    saver.restore(sess, "./my_mnist_model_0_to_4.ckpt")
    acc_test = accuracy.eval(feed_dict={X: X_test1, y: y_test1})
    print("Final test accuracy: {:.2f}%".format(acc_test*100))

INFO:tensorflow:Restoring parameters from ./my_mnist_model_0_to_4.ckpt
Final test accuracy: 99.16%
