# Treinando Redes Neurais Profundas

### Bibliotecas básicas

In [1]:
import numpy as np
import pandas as pd
from functools import partial
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
print(tf.__version__)

2.1.0


In [2]:
tf.compat.v1.disable_eager_execution()

### Normalização em lote

In [None]:
tf.compat.v1.reset_default_graph()

batch_norm_momentum = 0.9
learning_rate = 0.01

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')
training = tf.compat.v1.placeholder_with_default(False, shape=(), name='training')

with tf.name_scope('dnn'):
    he_init = tf.compat.v1.variance_scaling_initializer()

    my_batch_norm_layer = partial(
            tf.compat.v1.layers.batch_normalization,
            training=training,
            momentum=batch_norm_momentum)

    my_dense_layer = partial(
            tf.compat.v1.layers.dense,
            kernel_initializer=he_init)

    hidden1 = my_dense_layer(X, n_hidden1, name='hidden1')
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name='hidden2')
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name='outputs')
    logits = my_batch_norm_layer(logits_before_bn)

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

In [4]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch
        

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [5]:
n_epochs = 20
batch_size = 200

extra_update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)

with tf.compat.v1.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run([training_op, extra_update_ops],
                     feed_dict={training: True, X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_model_final.ckpt')

0 Validation accuracy: 0.9002
1 Validation accuracy: 0.9168
2 Validation accuracy: 0.9328
3 Validation accuracy: 0.9424
4 Validation accuracy: 0.9478
5 Validation accuracy: 0.952
6 Validation accuracy: 0.9568
7 Validation accuracy: 0.9586
8 Validation accuracy: 0.9612
9 Validation accuracy: 0.9626
10 Validation accuracy: 0.9642
11 Validation accuracy: 0.966
12 Validation accuracy: 0.9664
13 Validation accuracy: 0.967
14 Validation accuracy: 0.9698
15 Validation accuracy: 0.9702
16 Validation accuracy: 0.9712
17 Validation accuracy: 0.9714
18 Validation accuracy: 0.9704
19 Validation accuracy: 0.9718


### Gradiente Clipping

In [6]:
tf.compat.v1.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10

learning_rate = 0.01

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, name='hidden1')
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name='hidden2')
    hidden3 = tf.compat.v1.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name='hidden3')
    hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='hidden4')
    hidden5 = tf.compat.v1.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name='hidden5')
    logits = tf.compat.v1.layers.dense(hidden5, n_outputs, name='outputs')

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

In [7]:
threshold = 1.0

optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var) for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
    
init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()


n_epochs = 20
batch_size = 200


with tf.compat.v1.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_model_final.ckpt')

0 Validation accuracy: 0.5774
1 Validation accuracy: 0.8184
2 Validation accuracy: 0.875
3 Validation accuracy: 0.8914
4 Validation accuracy: 0.9018
5 Validation accuracy: 0.9182
6 Validation accuracy: 0.9232
7 Validation accuracy: 0.9284
8 Validation accuracy: 0.9346
9 Validation accuracy: 0.9376
10 Validation accuracy: 0.94
11 Validation accuracy: 0.9446
12 Validation accuracy: 0.9478
13 Validation accuracy: 0.9498
14 Validation accuracy: 0.9524
15 Validation accuracy: 0.9548
16 Validation accuracy: 0.9578
17 Validation accuracy: 0.958
18 Validation accuracy: 0.961
19 Validation accuracy: 0.9614


### Reutilizando um modelo TensorFlow

#### Carrega a estrutura do grafo e lista as operações

In [31]:
tf.compat.v1.reset_default_graph()

saver = tf.compat.v1.train.import_meta_graph('./my_model_final.ckpt.meta')

for op in tf.compat.v1.get_default_graph().get_operations():
    print(op.name)

#### Carrega as operações que serão utilizadas

In [9]:
X = tf.compat.v1.get_default_graph().get_tensor_by_name('X:0')
y = tf.compat.v1.get_default_graph().get_tensor_by_name('y:0')

accuracy = tf.compat.v1.get_default_graph().get_tensor_by_name('eval/accuracy:0')
training_op = tf.compat.v1.get_default_graph().get_operation_by_name('GradientDescent')

#### Criando uma coleção contendo todas as operações importantes

In [10]:
for op in (X, y, accuracy, training_op):
    tf.compat.v1.add_to_collection('my_important_ops', op)

In [11]:
X, y, accuracy, training_op = tf.compat.v1.get_collection('my_important_ops')

#### Inicia uma sessão, restaura o estado do modelo e continua treinando em seus dados:

In [12]:
with tf.compat.v1.Session() as sess:
    saver.restore(sess, './my_model_final.ckpt')

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.9622
1 Validation accuracy: 0.9636
2 Validation accuracy: 0.9654
3 Validation accuracy: 0.9628
4 Validation accuracy: 0.9654
5 Validation accuracy: 0.9644
6 Validation accuracy: 0.9674
7 Validation accuracy: 0.9672
8 Validation accuracy: 0.967
9 Validation accuracy: 0.9692
10 Validation accuracy: 0.97
11 Validation accuracy: 0.9694
12 Validation accuracy: 0.9694
13 Validation accuracy: 0.97
14 Validation accuracy: 0.9702
15 Validation accuracy: 0.971
16 Validation accuracy: 0.9704
17 Validation accuracy: 0.9702
18 Validation accuracy: 0.972
19 Validation accuracy: 0.971


#### Adicionamos uma nova 4ª camada oculta no topo da 3ª camada pré-treinada

In [13]:
tf.compat.v1.reset_default_graph()

n_hidden4 = 20  #nova camada
n_outputs = 10  #nova camada

saver = tf.compat.v1.train.import_meta_graph('./my_model_final.ckpt.meta')

X = tf.compat.v1.get_default_graph().get_tensor_by_name('X:0')
y = tf.compat.v1.get_default_graph().get_tensor_by_name('y:0')

hidden3 = tf.compat.v1.get_default_graph().get_tensor_by_name('dnn/hidden3/Relu:0')

new_hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='new_hidden4')
new_logits = tf.compat.v1.layers.dense(new_hidden4, n_outputs, name='new_outputs')

with tf.name_scope('new_loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=new_logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('new_eval'):
    correct = tf.nn.in_top_k(y, new_logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')

with tf.name_scope('new_train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

init = tf.compat.v1.global_variables_initializer()
new_saver = tf.compat.v1.train.Saver()

In [14]:
with tf.compat.v1.Session() as sess:
    init.run()
    saver.restore(sess, './my_model_final.ckpt')

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = new_saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.8924
1 Validation accuracy: 0.9258
2 Validation accuracy: 0.9388
3 Validation accuracy: 0.944
4 Validation accuracy: 0.9488
5 Validation accuracy: 0.9506
6 Validation accuracy: 0.9538
7 Validation accuracy: 0.9586
8 Validation accuracy: 0.9578
9 Validation accuracy: 0.9618
10 Validation accuracy: 0.9608
11 Validation accuracy: 0.9626
12 Validation accuracy: 0.9638
13 Validation accuracy: 0.9654
14 Validation accuracy: 0.9656
15 Validation accuracy: 0.9642
16 Validation accuracy: 0.9662
17 Validation accuracy: 0.9672
18 Validation accuracy: 0.9676
19 Validation accuracy: 0.9674


### Congelando as camadas inferiores

In [15]:
tf.compat.v1.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300 #reúso
n_hidden2 = 50  #reúso
n_hidden3 = 50  #reúso
n_hidden4 = 20  #nova camada
n_outputs = 10  #nova camada

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, name='hidden1')       #reúso
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name='hidden2') #reúso
    hidden3 = tf.compat.v1.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name='hidden3') #reúso
    hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='hidden4') #nova camada
    logits = tf.compat.v1.layers.dense(hidden4, n_outputs, name='outputs') #nova camada
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
    
    
with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope='hidden[34]|outputs')
    training_op = optimizer.minimize(loss, var_list=train_vars)


reuse_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='hidden[123]')
restore_saver = tf.compat.v1.train.Saver(reuse_vars) # restaurando as camadas 1-3

init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

with tf.compat.v1.Session() as sess:
    init.run()
    restore_saver.restore(sess, './my_model_final.ckpt')

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.8948
1 Validation accuracy: 0.9346
2 Validation accuracy: 0.9414
3 Validation accuracy: 0.945
4 Validation accuracy: 0.9472
5 Validation accuracy: 0.9494
6 Validation accuracy: 0.9498
7 Validation accuracy: 0.9504
8 Validation accuracy: 0.951
9 Validation accuracy: 0.951
10 Validation accuracy: 0.9524
11 Validation accuracy: 0.9536
12 Validation accuracy: 0.9536
13 Validation accuracy: 0.956
14 Validation accuracy: 0.9548
15 Validation accuracy: 0.9566
16 Validation accuracy: 0.9562
17 Validation accuracy: 0.9564
18 Validation accuracy: 0.9566
19 Validation accuracy: 0.956


#### Com tf.compat.v1.stop_gradient

In [16]:
tf.compat.v1.reset_default_graph()

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, name='hidden1') #reutilizada e congelada
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name='hidden2') #reutilizada e congelada
    hidden2_stop = tf.compat.v1.stop_gradient(hidden2) #realiza o congelamento
    hidden3 = tf.compat.v1.layers.dense(hidden2_stop, n_hidden3, activation=tf.nn.relu, name='hidden3') #reutilizada e congelada
    hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='hidden4') #nova
    logits = tf.compat.v1.layers.dense(hidden4, n_outputs, name='outputs') #nova
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
    
    
with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope='hidden[34]|outputs')
    training_op = optimizer.minimize(loss, var_list=train_vars)


reuse_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='hidden[123]')
restore_saver = tf.compat.v1.train.Saver(reuse_vars) # restaurando as camadas 1-3

init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

with tf.compat.v1.Session() as sess:
    init.run()
    restore_saver.restore(sess, './my_model_final.ckpt')

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.8804
1 Validation accuracy: 0.9094
2 Validation accuracy: 0.9314
3 Validation accuracy: 0.9368
4 Validation accuracy: 0.9428
5 Validation accuracy: 0.9464
6 Validation accuracy: 0.948
7 Validation accuracy: 0.9492
8 Validation accuracy: 0.9514
9 Validation accuracy: 0.9518
10 Validation accuracy: 0.9544
11 Validation accuracy: 0.9552
12 Validation accuracy: 0.9554
13 Validation accuracy: 0.9552
14 Validation accuracy: 0.957
15 Validation accuracy: 0.9562
16 Validation accuracy: 0.956
17 Validation accuracy: 0.9562
18 Validation accuracy: 0.9574
19 Validation accuracy: 0.9586


### Armazenando em cache as camadas congeladas

In [17]:
tf.compat.v1.reset_default_graph()

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, name='hidden1') #reutilizada e congelada
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name='hidden2') #reutilizada, congelada e em cache
    hidden2_stop = tf.compat.v1.stop_gradient(hidden2) #realiza o congelamento
    hidden3 = tf.compat.v1.layers.dense(hidden2_stop, n_hidden3, activation=tf.nn.relu, name='hidden3') #reutilizada e congelada
    hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='hidden4') #nova
    logits = tf.compat.v1.layers.dense(hidden4, n_outputs, name='outputs') #nova
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
    
    
with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope='hidden[34]|outputs')
    training_op = optimizer.minimize(loss, var_list=train_vars)


reuse_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='hidden[123]')
restore_saver = tf.compat.v1.train.Saver(reuse_vars) # restaurando as camadas 1-3

init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

In [18]:
n_batches = len(X_train) // batch_size

with tf.compat.v1.Session() as sess:
    init.run()
    restore_saver.restore(sess, './my_model_final.ckpt')
    
    h2_cache = sess.run(hidden2, feed_dict={X: X_train})
    h2_cache_valid = sess.run(hidden2, feed_dict={X: X_valid})

    for epoch in range(n_epochs):
        shuffled_idx = np.random.permutation(len(X_train))
        hidden2_batches = np.array_split(h2_cache[shuffled_idx], n_batches)
        y_batches = np.array_split(y_train[shuffled_idx], n_batches)
        for hidden2_batch, y_batch in zip(hidden2_batches, y_batches):
            sess.run(training_op, feed_dict={hidden2:hidden2_batch, y:y_batch})

        accuracy_val = accuracy.eval(feed_dict={hidden2: h2_cache_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)
    save_path = saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.8894
1 Validation accuracy: 0.9264
2 Validation accuracy: 0.9374
3 Validation accuracy: 0.9434
4 Validation accuracy: 0.9454
5 Validation accuracy: 0.9496
6 Validation accuracy: 0.95
7 Validation accuracy: 0.9522
8 Validation accuracy: 0.9522
9 Validation accuracy: 0.9534
10 Validation accuracy: 0.953
11 Validation accuracy: 0.9554
12 Validation accuracy: 0.9564
13 Validation accuracy: 0.9548
14 Validation accuracy: 0.9572
15 Validation accuracy: 0.956
16 Validation accuracy: 0.9562
17 Validation accuracy: 0.957
18 Validation accuracy: 0.9566
19 Validation accuracy: 0.9568


### Cronograma de aprendizado

In [19]:
tf.compat.v1.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, name='hidden1')
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name='hidden2')
    logits = tf.compat.v1.layers.dense(hidden2, n_outputs, name='outputs')

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
    
with tf.name_scope('train'):
    initial_learning_rate = 0.1
    decay_steps = 10000
    decay_rate = 1/10
    global_step = tf.Variable(0, trainable=False, name='global_step')
    learning_rate = tf.compat.v1.train.exponential_decay(initial_learning_rate, global_step, decay_steps, decay_rate)
    optimizer = tf.compat.v1.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss, global_step=global_step)

init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

In [20]:
n_epochs = 5
batch_size = 50

with tf.compat.v1.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_model_final.ckpt')

0 Validation accuracy: 0.959
1 Validation accuracy: 0.9716
2 Validation accuracy: 0.9778
3 Validation accuracy: 0.9796
4 Validation accuracy: 0.9808


### Evitando overfitting por meio da regularização

#### Regularização l1 e l2

In [21]:
from tensorflow.keras.regularizers import l1

tf.compat.v1.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')
  
scale = 0.001 # hiperparâmetro regularização l1

my_dense_layer = partial(tf.compat.v1.layers.dense, activation=tf.nn.relu, kernel_regularizer=l1(l=scale))

with tf.name_scope('dnn'):
    hidden1 = my_dense_layer(X, n_hidden1, name='hidden1')
    hidden2 = my_dense_layer(hidden1, n_hidden2, name='hidden2')
    logits = my_dense_layer(hidden2, n_outputs, activation=None, name='outputs')
    

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    base_loss = tf.reduce_mean(xentropy, name='avg_xentropy')
    reg_losses = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.REGULARIZATION_LOSSES)
    loss = tf.add_n([base_loss] + reg_losses, name='loss')


with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')

learning_rate = 0.01

with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

In [22]:
n_epochs = 20
batch_size = 200

with tf.compat.v1.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_model_final.ckpt')

0 Validation accuracy: 0.8076
1 Validation accuracy: 0.8654
2 Validation accuracy: 0.885
3 Validation accuracy: 0.8936
4 Validation accuracy: 0.9022
5 Validation accuracy: 0.9062
6 Validation accuracy: 0.9106
7 Validation accuracy: 0.9106
8 Validation accuracy: 0.9124
9 Validation accuracy: 0.9146
10 Validation accuracy: 0.9168
11 Validation accuracy: 0.917
12 Validation accuracy: 0.9174
13 Validation accuracy: 0.9176
14 Validation accuracy: 0.9182
15 Validation accuracy: 0.9178
16 Validation accuracy: 0.9182
17 Validation accuracy: 0.9198
18 Validation accuracy: 0.9184
19 Validation accuracy: 0.9188


### Dropout

In [23]:
tf.compat.v1.reset_default_graph()

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')
training = tf.compat.v1.placeholder_with_default(False, shape=(), name='training')

dropout_rate = 0.5  #== 1 - keep_prob
X_drop = tf.compat.v1.layers.dropout(X, dropout_rate, training=training)

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X_drop, n_hidden1, activation=tf.nn.relu, name='hidden1')
    hidden1_drop = tf.compat.v1.layers.dropout(hidden1, dropout_rate, training=training)
    hidden2 = tf.compat.v1.layers.dense(hidden1_drop, n_hidden2, activation=tf.nn.relu, name='hidden2')
    hidden2_drop = tf.compat.v1.layers.dropout(hidden2, dropout_rate, training=training)
    logits = tf.compat.v1.layers.dense(hidden2_drop, n_outputs, name='outputs')
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.MomentumOptimizer(learning_rate, momentum=0.9)
    training_op = optimizer.minimize(loss)    

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

Instructions for updating:
Use keras.layers.dropout instead.


In [24]:
n_epochs = 20
batch_size = 50

with tf.compat.v1.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch, training: True})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_model_final.ckpt')

0 Validation accuracy: 0.9238
1 Validation accuracy: 0.9448
2 Validation accuracy: 0.951
3 Validation accuracy: 0.9598
4 Validation accuracy: 0.9604
5 Validation accuracy: 0.9622
6 Validation accuracy: 0.964
7 Validation accuracy: 0.9648
8 Validation accuracy: 0.9676
9 Validation accuracy: 0.9668
10 Validation accuracy: 0.9702
11 Validation accuracy: 0.9704
12 Validation accuracy: 0.97
13 Validation accuracy: 0.9736
14 Validation accuracy: 0.9722
15 Validation accuracy: 0.9722
16 Validation accuracy: 0.9728
17 Validation accuracy: 0.9742
18 Validation accuracy: 0.9726
19 Validation accuracy: 0.972


### Regularização Max-Norm

In [25]:
def max_norm_regularizer(threshold, axes=1, name='max_norm', collection='max_norm'):
    def max_norm(weights):
        clipped = tf.clip_by_norm(weights, clip_norm=threshold, axes=axes)
        clip_weights = tf.compat.v1.assign(weights, clipped, name=name)
        tf.compat.v1.add_to_collection(collection, clip_weights)
        return None
    return max_norm

In [26]:
tf.compat.v1.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

learning_rate = 0.01
momentum = 0.9

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

max_norm_reg = max_norm_regularizer(threshold=1.0)

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, kernel_regularizer=max_norm_reg, name='hidden1')
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, kernel_regularizer=max_norm_reg, name='hidden2')
    logits = tf.compat.v1.layers.dense(hidden2, n_outputs, name='outputs')

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.MomentumOptimizer(learning_rate, momentum)
    training_op = optimizer.minimize(loss)    

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

In [27]:
n_epochs = 20
batch_size = 50
clip_all_weights = tf.compat.v1.get_collection('max_norm')

with tf.compat.v1.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            sess.run(clip_all_weights)
        acc_valid = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', acc_valid)

    save_path = saver.save(sess, './my_model_final.ckpt')

0 Validation accuracy: 0.9598
1 Validation accuracy: 0.9708
2 Validation accuracy: 0.978
3 Validation accuracy: 0.9756
4 Validation accuracy: 0.9772
5 Validation accuracy: 0.9806
6 Validation accuracy: 0.979
7 Validation accuracy: 0.9816
8 Validation accuracy: 0.9806
9 Validation accuracy: 0.9826
10 Validation accuracy: 0.9844
11 Validation accuracy: 0.9844
12 Validation accuracy: 0.9822
13 Validation accuracy: 0.984
14 Validation accuracy: 0.986
15 Validation accuracy: 0.9816
16 Validation accuracy: 0.9838
17 Validation accuracy: 0.9848
18 Validation accuracy: 0.9846
19 Validation accuracy: 0.9848
