# Treinando Redes Neurais Profundas

### Bibliotecas básicas

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
print(tf.__version__)

2.1.0


### Normalização em lote

In [None]:
from functools import partial

tf.compat.v1.disable_eager_execution()
tf.compat.v1.reset_default_graph()

batch_norm_momentum = 0.9
learning_rate = 0.01

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 100
n_outputs = 10

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')
training = tf.compat.v1.placeholder_with_default(False, shape=(), name='training')

with tf.name_scope('dnn'):
    he_init = tf.compat.v1.variance_scaling_initializer()

    my_batch_norm_layer = partial(
            tf.compat.v1.layers.batch_normalization,
            training=training,
            momentum=batch_norm_momentum)

    my_dense_layer = partial(
            tf.compat.v1.layers.dense,
            kernel_initializer=he_init)

    hidden1 = my_dense_layer(X, n_hidden1, name='hidden1')
    bn1 = tf.nn.elu(my_batch_norm_layer(hidden1))
    hidden2 = my_dense_layer(bn1, n_hidden2, name='hidden2')
    bn2 = tf.nn.elu(my_batch_norm_layer(hidden2))
    logits_before_bn = my_dense_layer(bn2, n_outputs, name='outputs')
    logits = my_batch_norm_layer(logits_before_bn)

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
    
init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

In [3]:
def shuffle_batch(X, y, batch_size):
    rnd_idx = np.random.permutation(len(X))
    n_batches = len(X) // batch_size
    for batch_idx in np.array_split(rnd_idx, n_batches):
        X_batch, y_batch = X[batch_idx], y[batch_idx]
        yield X_batch, y_batch
        

(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.astype(np.float32).reshape(-1, 28*28) / 255.0
X_test = X_test.astype(np.float32).reshape(-1, 28*28) / 255.0
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)
X_valid, X_train = X_train[:5000], X_train[5000:]
y_valid, y_train = y_train[:5000], y_train[5000:]

In [4]:
n_epochs = 20
batch_size = 200

extra_update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)

with tf.compat.v1.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run([training_op, extra_update_ops],
                     feed_dict={training: True, X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_model_final.ckpt')

0 Validation accuracy: 0.8924
1 Validation accuracy: 0.9208
2 Validation accuracy: 0.933
3 Validation accuracy: 0.9404
4 Validation accuracy: 0.9496
5 Validation accuracy: 0.954
6 Validation accuracy: 0.9588
7 Validation accuracy: 0.9584
8 Validation accuracy: 0.9606
9 Validation accuracy: 0.9636
10 Validation accuracy: 0.964
11 Validation accuracy: 0.9658
12 Validation accuracy: 0.9662
13 Validation accuracy: 0.9682
14 Validation accuracy: 0.9694
15 Validation accuracy: 0.9688
16 Validation accuracy: 0.9694
17 Validation accuracy: 0.971
18 Validation accuracy: 0.971
19 Validation accuracy: 0.9728


### Gradiente Clipping

In [5]:
tf.compat.v1.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300
n_hidden2 = 50
n_hidden3 = 50
n_hidden4 = 50
n_hidden5 = 50
n_outputs = 10

learning_rate = 0.01

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, name='hidden1')
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name='hidden2')
    hidden3 = tf.compat.v1.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name='hidden3')
    hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='hidden4')
    hidden5 = tf.compat.v1.layers.dense(hidden4, n_hidden5, activation=tf.nn.relu, name='hidden5')
    logits = tf.compat.v1.layers.dense(hidden5, n_outputs, name='outputs')

with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

In [6]:
threshold = 1.0

optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(loss)
capped_gvs = [(tf.clip_by_value(grad, -threshold, threshold), var) for grad, var in grads_and_vars]
training_op = optimizer.apply_gradients(capped_gvs)

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
    
init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()


n_epochs = 20
batch_size = 200


with tf.compat.v1.Session() as sess:
    init.run()
    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_model_final.ckpt')

0 Validation accuracy: 0.3242
1 Validation accuracy: 0.763
2 Validation accuracy: 0.8698
3 Validation accuracy: 0.8956
4 Validation accuracy: 0.905
5 Validation accuracy: 0.9188
6 Validation accuracy: 0.9214
7 Validation accuracy: 0.9316
8 Validation accuracy: 0.9368
9 Validation accuracy: 0.943
10 Validation accuracy: 0.945
11 Validation accuracy: 0.948
12 Validation accuracy: 0.9508
13 Validation accuracy: 0.9522
14 Validation accuracy: 0.9532
15 Validation accuracy: 0.9576
16 Validation accuracy: 0.9554
17 Validation accuracy: 0.9588
18 Validation accuracy: 0.959
19 Validation accuracy: 0.958


### Reutilizando um modelo TensorFlow

#### Carrega a estrutura do grafo e lista as operações

In [None]:
tf.compat.v1.reset_default_graph()

saver = tf.compat.v1.train.import_meta_graph('./my_model_final.ckpt.meta')

for op in tf.compat.v1.get_default_graph().get_operations():
    print(op.name)

#### Carrega as operações que serão utilizadas

In [8]:
X = tf.compat.v1.get_default_graph().get_tensor_by_name('X:0')
y = tf.compat.v1.get_default_graph().get_tensor_by_name('y:0')

accuracy = tf.compat.v1.get_default_graph().get_tensor_by_name('eval/accuracy:0')
training_op = tf.compat.v1.get_default_graph().get_operation_by_name('GradientDescent')

#### Criando uma coleção contendo todas as operações importantes

In [9]:
for op in (X, y, accuracy, training_op):
    tf.compat.v1.add_to_collection('my_important_ops', op)

In [10]:
X, y, accuracy, training_op = tf.compat.v1.get_collection('my_important_ops')

#### Inicia uma sessão, restaura o estado do modelo e continua treinando em seus dados:

In [11]:
with tf.compat.v1.Session() as sess:
    saver.restore(sess, './my_model_final.ckpt')

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.9626
1 Validation accuracy: 0.9626
2 Validation accuracy: 0.964
3 Validation accuracy: 0.9654
4 Validation accuracy: 0.9664
5 Validation accuracy: 0.9636
6 Validation accuracy: 0.9668
7 Validation accuracy: 0.9682
8 Validation accuracy: 0.9682
9 Validation accuracy: 0.9698
10 Validation accuracy: 0.967
11 Validation accuracy: 0.9696
12 Validation accuracy: 0.9712
13 Validation accuracy: 0.9702
14 Validation accuracy: 0.97
15 Validation accuracy: 0.972
16 Validation accuracy: 0.9718
17 Validation accuracy: 0.9706
18 Validation accuracy: 0.9724
19 Validation accuracy: 0.972


#### Adicionamos uma nova 4ª camada oculta no topo da 3ª camada pré-treinada

In [12]:
tf.compat.v1.reset_default_graph()

n_hidden4 = 20  #nova camada
n_outputs = 10  #nova camada

saver = tf.compat.v1.train.import_meta_graph('./my_model_final.ckpt.meta')

X = tf.compat.v1.get_default_graph().get_tensor_by_name('X:0')
y = tf.compat.v1.get_default_graph().get_tensor_by_name('y:0')

hidden3 = tf.compat.v1.get_default_graph().get_tensor_by_name('dnn/hidden3/Relu:0')

new_hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='new_hidden4')
new_logits = tf.compat.v1.layers.dense(new_hidden4, n_outputs, name='new_outputs')

with tf.name_scope('new_loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=new_logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('new_eval'):
    correct = tf.nn.in_top_k(y, new_logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')

with tf.name_scope('new_train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    training_op = optimizer.minimize(loss)

init = tf.compat.v1.global_variables_initializer()
new_saver = tf.compat.v1.train.Saver()

In [13]:
with tf.compat.v1.Session() as sess:
    init.run()
    saver.restore(sess, './my_model_final.ckpt')

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = new_saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.9146
1 Validation accuracy: 0.9328
2 Validation accuracy: 0.9418
3 Validation accuracy: 0.949
4 Validation accuracy: 0.9542
5 Validation accuracy: 0.9536
6 Validation accuracy: 0.9556
7 Validation accuracy: 0.9582
8 Validation accuracy: 0.9582
9 Validation accuracy: 0.962
10 Validation accuracy: 0.9608
11 Validation accuracy: 0.9642
12 Validation accuracy: 0.9616
13 Validation accuracy: 0.9632
14 Validation accuracy: 0.9646
15 Validation accuracy: 0.9648
16 Validation accuracy: 0.9666
17 Validation accuracy: 0.9662
18 Validation accuracy: 0.9656
19 Validation accuracy: 0.9684


### Congelando as camadas inferiores

In [15]:
tf.compat.v1.reset_default_graph()

n_inputs = 28 * 28
n_hidden1 = 300 #reúso
n_hidden2 = 50  #reúso
n_hidden3 = 50  #reúso
n_hidden4 = 20  #nova camada
n_outputs = 10  #nova camada

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, name='hidden1')       #reúso
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name='hidden2') #reúso
    hidden3 = tf.compat.v1.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name='hidden3') #reúso
    hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='hidden4') #nova camada
    logits = tf.compat.v1.layers.dense(hidden4, n_outputs, name='outputs') #nova camada
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
    
    
with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope='hidden[34]|outputs')
    training_op = optimizer.minimize(loss, var_list=train_vars)


reuse_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='hidden[123]')
restore_saver = tf.compat.v1.train.Saver(reuse_vars) # restaurando as camadas 1-3

init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

with tf.compat.v1.Session() as sess:
    init.run()
    restore_saver.restore(sess, './my_model_final.ckpt')

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.9182
1 Validation accuracy: 0.9372
2 Validation accuracy: 0.9444
3 Validation accuracy: 0.9484
4 Validation accuracy: 0.9508
5 Validation accuracy: 0.953
6 Validation accuracy: 0.956
7 Validation accuracy: 0.9578
8 Validation accuracy: 0.9598
9 Validation accuracy: 0.9598
10 Validation accuracy: 0.9588
11 Validation accuracy: 0.958
12 Validation accuracy: 0.9582
13 Validation accuracy: 0.962
14 Validation accuracy: 0.9598
15 Validation accuracy: 0.9594
16 Validation accuracy: 0.9602
17 Validation accuracy: 0.9582
18 Validation accuracy: 0.9598
19 Validation accuracy: 0.9596


#### Com tf.compat.v1.stop_gradient

In [17]:
tf.compat.v1.reset_default_graph()

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, name='hidden1') #reutilizada e congelada
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name='hidden2') #reutilizada e congelada
    hidden2_stop = tf.compat.v1.stop_gradient(hidden2) #realiza o congelamento
    hidden3 = tf.compat.v1.layers.dense(hidden2_stop, n_hidden3, activation=tf.nn.relu, name='hidden3') #reutilizada e congelada
    hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='hidden4') #nova
    logits = tf.compat.v1.layers.dense(hidden4, n_outputs, name='outputs') #nova
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
    
    
with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope='hidden[34]|outputs')
    training_op = optimizer.minimize(loss, var_list=train_vars)


reuse_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='hidden[123]')
restore_saver = tf.compat.v1.train.Saver(reuse_vars) # restaurando as camadas 1-3

init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

with tf.compat.v1.Session() as sess:
    init.run()
    restore_saver.restore(sess, './my_model_final.ckpt')

    for epoch in range(n_epochs):
        for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
            sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
        accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)

    save_path = saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.8952
1 Validation accuracy: 0.9288
2 Validation accuracy: 0.9368
3 Validation accuracy: 0.9428
4 Validation accuracy: 0.9458
5 Validation accuracy: 0.947
6 Validation accuracy: 0.948
7 Validation accuracy: 0.9496
8 Validation accuracy: 0.951
9 Validation accuracy: 0.9522
10 Validation accuracy: 0.9528
11 Validation accuracy: 0.9544
12 Validation accuracy: 0.956
13 Validation accuracy: 0.9562
14 Validation accuracy: 0.9562
15 Validation accuracy: 0.9574
16 Validation accuracy: 0.956
17 Validation accuracy: 0.9578
18 Validation accuracy: 0.9582
19 Validation accuracy: 0.958


### Armazenando em cache as camadas congeladas

In [18]:
tf.compat.v1.reset_default_graph()

X = tf.compat.v1.placeholder(tf.float32, shape=(None, n_inputs), name='X')
y = tf.compat.v1.placeholder(tf.int32, shape=(None), name='y')

with tf.name_scope('dnn'):
    hidden1 = tf.compat.v1.layers.dense(X, n_hidden1, activation=tf.nn.relu, name='hidden1') #reutilizada e congelada
    hidden2 = tf.compat.v1.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name='hidden2') #reutilizada, congelada e em cache
    hidden2_stop = tf.compat.v1.stop_gradient(hidden2) #realiza o congelamento
    hidden3 = tf.compat.v1.layers.dense(hidden2_stop, n_hidden3, activation=tf.nn.relu, name='hidden3') #reutilizada e congelada
    hidden4 = tf.compat.v1.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name='hidden4') #nova
    logits = tf.compat.v1.layers.dense(hidden4, n_outputs, name='outputs') #nova
    
with tf.name_scope('loss'):
    xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
    loss = tf.reduce_mean(xentropy, name='loss')

with tf.name_scope('eval'):
    correct = tf.nn.in_top_k(y, logits, 1)
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name='accuracy')
    
    
with tf.name_scope('train'):
    optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    train_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope='hidden[34]|outputs')
    training_op = optimizer.minimize(loss, var_list=train_vars)


reuse_vars = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope='hidden[123]')
restore_saver = tf.compat.v1.train.Saver(reuse_vars) # restaurando as camadas 1-3

init = tf.compat.v1.global_variables_initializer()
saver = tf.compat.v1.train.Saver()

In [19]:
n_batches = len(X_train) // batch_size

with tf.compat.v1.Session() as sess:
    init.run()
    restore_saver.restore(sess, './my_model_final.ckpt')
    
    h2_cache = sess.run(hidden2, feed_dict={X: X_train})
    h2_cache_valid = sess.run(hidden2, feed_dict={X: X_valid})

    for epoch in range(n_epochs):
        shuffled_idx = np.random.permutation(len(X_train))
        hidden2_batches = np.array_split(h2_cache[shuffled_idx], n_batches)
        y_batches = np.array_split(y_train[shuffled_idx], n_batches)
        for hidden2_batch, y_batch in zip(hidden2_batches, y_batches):
            sess.run(training_op, feed_dict={hidden2:hidden2_batch, y:y_batch})

        accuracy_val = accuracy.eval(feed_dict={hidden2: h2_cache_valid, y: y_valid})
        print(epoch, 'Validation accuracy:', accuracy_val)
    save_path = saver.save(sess, './my_new_model_final.ckpt')

INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.8934
1 Validation accuracy: 0.917
2 Validation accuracy: 0.9322
3 Validation accuracy: 0.9404
4 Validation accuracy: 0.9442
5 Validation accuracy: 0.9488
6 Validation accuracy: 0.9494
7 Validation accuracy: 0.9522
8 Validation accuracy: 0.9532
9 Validation accuracy: 0.9542
10 Validation accuracy: 0.9552
11 Validation accuracy: 0.9552
12 Validation accuracy: 0.9548
13 Validation accuracy: 0.9556
14 Validation accuracy: 0.9564
15 Validation accuracy: 0.956
16 Validation accuracy: 0.9566
17 Validation accuracy: 0.9558
18 Validation accuracy: 0.9574
19 Validation accuracy: 0.9566
