In [40]:
import tensorflow as tf
import keras
from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
y_train

y_train_converted = keras.utils.to_categorical(y_train)
y_test_converted = keras.utils.to_categorical(y_test)

x_train_remolded = x_train.reshape((60000, 784))
x_test_remolded = x_test.reshape((10000, 784))

x_train_normalized = x_train_remolded.astype("float32") / 255
x_test_normalized = x_test_remolded.astype("float32") / 255

In [41]:
learning_rate = 0.001
epochs = 10
batch_size = 200
dropout_rate = 0.3
l2_alpha = 0.0001

In [42]:
n_input = x_train_normalized.shape[1]
n_layer_1 = 30
n_layer_2 = 30
n_class = y_train_converted.shape[1]

In [43]:
w1 = tf.Variable(tf.random.normal([n_input, n_layer_1], stddev=0.05))
b1 = tf.Variable(tf.zeros([n_layer_1]))

w2 = tf.Variable(tf.random.normal([n_layer_1, n_layer_2], stddev=0.05))
b2 = tf.Variable(tf.zeros([n_layer_2]))

w_out = tf.Variable(tf.random.normal([n_layer_2, n_class], stddev=0.05))
b_out = tf.Variable(tf.zeros([n_class]))

In [44]:
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

In [45]:
def feed_forward(x, training):
    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, w1), b1))
    #layer_1 = tf.nn.dropout(layer_1, rate=dropout_rate if training else 0.0) dropout
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, w2), b2))
    #layer_2 = tf.nn.dropout(layer_2, rate=dropout_rate if training else 0.0) dropout
    output = tf.matmul(layer_2, w_out) + b_out
    return output

# Função de perda
def loss_fn(logits, labels):
    cross = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
    #l2_loss =  l2_alpha*(tf.nn.l2_loss(w1)+tf.nn.l2_loss(w2)+tf.nn.l2_loss(w_out)) #regularizador l2
    return cross #+ l2_loss

for epoch in range(epochs):
    for batch in range(0, len(x_train_normalized), batch_size):
        x_batch = x_train_normalized[batch:batch + batch_size]
        y_batch = y_train_converted[batch:batch + batch_size]

        with tf.GradientTape() as tape:
            logits = feed_forward(x_batch, training=True)
            loss = loss_fn(logits, y_batch)
        
        # Calcula e aplica os gradientes
        grads = tape.gradient(loss, [w1, b1, w2, b2, w_out, b_out])
        optimizer.apply_gradients(zip(grads, [w1, b1, w2, b2, w_out, b_out]))

    # Cálculo da acurácia (fora do tape)
    logits = feed_forward(x_train_normalized, training=False)
    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y_train_converted, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print(f'Epoch {epoch + 1}, Loss: {loss.numpy()}, Accuracy: {accuracy.numpy()}')

Epoch 1, Loss: 0.2307300567626953, Accuracy: 0.8936166763305664
Epoch 2, Loss: 0.17667171359062195, Accuracy: 0.9140333533287048
Epoch 3, Loss: 0.15888555347919464, Accuracy: 0.9222999811172485
Epoch 4, Loss: 0.14964668452739716, Accuracy: 0.9299166798591614
Epoch 5, Loss: 0.14561161398887634, Accuracy: 0.9363833069801331
Epoch 6, Loss: 0.14281314611434937, Accuracy: 0.9412500262260437
Epoch 7, Loss: 0.13974504172801971, Accuracy: 0.9451500177383423
Epoch 8, Loss: 0.13121157884597778, Accuracy: 0.9502666592597961
Epoch 9, Loss: 0.12687045335769653, Accuracy: 0.9538666605949402
Epoch 10, Loss: 0.12178163230419159, Accuracy: 0.9567999839782715
