
Imports e definição do dataset.

In [1]:
import tensorflow as tf
import time
from tensorflow import keras
import matplotlib.pyplot as plt
import pprint

In [2]:
datasets = [
    keras.datasets.mnist,
    keras.datasets.fashion_mnist,
    keras.datasets.cifar10,
    keras.datasets.cifar100
]
#Carrega duas tuplas, representando os dados de treinamento e de teste.
#Cada tupla tem as imagens e os respectivos rótulos
all_data = [dataset.load_data() for dataset in datasets]
all_train = [data[0] for data in all_data]
all_test = [data[1] for data in all_data]

Tarefa
Escreva código para executar redes neurais nos seguintes datasets:

MNIST (pode aproveitar o codigo existente)
Fashion MNIST
CIFAR-10
CIFAR-100
Cada execução deve ser por 10 épocas.

Você deve preencher as funções a seguir para retornarem a rede neural com a melhor configuração que você conseguiu para cada dataset. O notebook deve ser entregue com a rede neural que obteve a melhor performance em cada conjunto de dados.

IMPORTANTE: as funções não devem TREINAR nem AVALIAR as redes neurais, apenas instanciá-las e retorná-las.

Ao final, preencha o dict results com o desempenho encontrado em cada execução.

In [3]:
def get_mnist_network():
  num_classes = 10
  model = keras.Sequential([
      tf.keras.layers.Input(shape=(28, 28, 1)),

      tf.keras.layers.Conv2D(64, (3, 3), activation='gelu', padding='same'),
      tf.keras.layers.GaussianDropout(0.1),
      tf.keras.layers.Conv2D(64, (3, 3), activation='gelu', padding='same'),

      tf.keras.layers.MaxPooling2D((4, 4)),
      tf.keras.layers.GaussianDropout(0.25),

      tf.keras.layers.Flatten(),

      tf.keras.layers.Dense(50, activation='gelu'),
      tf.keras.layers.Dense(50, activation='gelu'),
      tf.keras.layers.GaussianDropout(0.5),

      tf.keras.layers.Dense(num_classes, activation='softmax')
  ])

  rate = keras.optimizers.schedules.ExponentialDecay(
    0.001,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

  model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=rate,use_ema=True),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
  return model

def get_fashion_mnist_network():
  num_classes = 10
  model = keras.Sequential([
      tf.keras.layers.Input(shape=(28, 28, 1)),

      tf.keras.layers.Conv2D(64, (3, 3), activation='gelu', padding='same'),
      tf.keras.layers.GaussianDropout(0.1),
      tf.keras.layers.Conv2D(64, (3, 3), activation='gelu', padding='same'),

      tf.keras.layers.MaxPooling2D((4, 4)),
      tf.keras.layers.GaussianDropout(0.25),
      tf.keras.layers.Flatten(),

      tf.keras.layers.Dense(50, activation='gelu'),
      tf.keras.layers.Dense(50, activation='gelu'),
      tf.keras.layers.GaussianDropout(0.5),

      tf.keras.layers.Dense(num_classes, activation='softmax')
  ])

  rate = keras.optimizers.schedules.ExponentialDecay(
    0.001,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

  model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=rate,use_ema=True),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
  return model

def get_cifar10_network():
  num_classes = 10
  def conv_layer(filters, kernel_size=(3, 3)):
    return (
        tf.keras.layers.Conv2D(filters, kernel_size, activation='gelu', padding='same'),
        tf.keras.layers.GaussianDropout(0.1),
        tf.keras.layers.Conv2D(filters, kernel_size, activation='gelu', padding='same'),

        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.GaussianDropout(0.25)
    )

  model = keras.Sequential([
      tf.keras.layers.Input(shape=(32, 32, 3)),
      tf.keras.layers.BatchNormalization(),

      *conv_layer(32),
      *conv_layer(64),
      *conv_layer(128),
      tf.keras.layers.Flatten(),

      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dense(64, activation='relu'),

      tf.keras.layers.GaussianDropout(0.5),

      tf.keras.layers.Dense(num_classes, activation='softmax')
  ])

  rate = keras.optimizers.schedules.ExponentialDecay(
    0.001,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

  model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=rate,use_ema=True),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
  return model

def get_cifar100_network():
  num_classes = 100
  def conv_layer(filters, kernel_size=(3, 3)):
    return (
        tf.keras.layers.Conv2D(filters, kernel_size, activation='gelu', padding='same'),
        tf.keras.layers.GaussianDropout(0.1),
        tf.keras.layers.Conv2D(filters, kernel_size, activation='gelu', padding='same'),

        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.GaussianDropout(0.25)
    )

  model = keras.Sequential([
      tf.keras.layers.Input(shape=(32, 32, 3)),
      tf.keras.layers.BatchNormalization(),

      *conv_layer(64),
      tf.keras.layers.BatchNormalization(),
      *conv_layer(128),
      tf.keras.layers.BatchNormalization(),
      *conv_layer(256),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Flatten(),

      tf.keras.layers.Dense(512, activation='gelu'),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dense(512, activation='gelu'),

      tf.keras.layers.GaussianDropout(0.5),

      tf.keras.layers.Dense(num_classes, activation='softmax')
  ])

  rate = keras.optimizers.schedules.ExponentialDecay(
    0.003,
    decay_steps=100000,
    decay_rate=0.96,
    staircase=True)

  model.compile(optimizer=tf.keras.optimizers.AdamW(learning_rate=rate,use_ema=True),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
  return model


In [4]:
model_generators = [
    get_mnist_network,
    get_fashion_mnist_network,
    get_cifar10_network,
    get_cifar100_network
]

def time_func(func):
  start_time = time.perf_counter()
  func()
  end_time = time.perf_counter()
  return end_time - start_time

def train_model(index, verbose=False):
    model = model_generators[index]()

    if verbose:
      model.summary()
    verbose = 1 if verbose else 0

    train_time = time_func(lambda: model.fit(*all_train[index], validation_data=all_test[index], verbose=verbose, epochs=10))


    # Avalie o modelo no conjunto de teste
    test_loss, test_accuracy = model.evaluate(*all_test[index], verbose=verbose)
    print(f'Acurácia no conjunto de teste: {test_accuracy * 100:.2f}%')
    print(f'Tempo de treino: {train_time:.2f}s')
    return {"model": model, "acc": test_accuracy, "time": train_time}

In [5]:
all_models = enumerate(["mnist", "fashion_mnist", "cifar10", "cifar100"])
results = {
    nome: {"time": result["time"], "acc": result["acc"] } for i, nome in all_models if (result := train_model(i, verbose=True))
}
pprint.pprint(results)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - accuracy: 0.2922 - loss: 2.4558 - val_accuracy: 0.8686 - val_loss: 0.3078
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8576 - loss: 0.3750 - val_accuracy: 0.9716 - val_loss: 0.1073
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9578 - loss: 0.1707 - val_accuracy: 0.9813 - val_loss: 0.0696
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9674 - loss: 0.1282 - val_accuracy: 0.9833 - val_loss: 0.0607
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9701 - loss: 0.1136 - val_accuracy: 0.9853 - val_loss: 0.0547
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9722 - loss: 0.1088 - val_accuracy: 0.9830 - val_loss: 0.0621
Epoch 7/10
[1m

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 6ms/step - accuracy: 0.6198 - loss: 1.5205 - val_accuracy: 0.8661 - val_loss: 0.3698
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8524 - loss: 0.4356 - val_accuracy: 0.8763 - val_loss: 0.3440
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8736 - loss: 0.3753 - val_accuracy: 0.8875 - val_loss: 0.3159
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8830 - loss: 0.3462 - val_accuracy: 0.8964 - val_loss: 0.2944
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8879 - loss: 0.3260 - val_accuracy: 0.8966 - val_loss: 0.2960
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.8981 - loss: 0.2959 - val_accuracy: 0.8954 - val_loss: 0.2960
Epoch 7/10
[1m

Epoch 1/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 15ms/step - accuracy: 0.3277 - loss: 1.8512 - val_accuracy: 0.6305 - val_loss: 1.0505
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.6155 - loss: 1.1300 - val_accuracy: 0.7199 - val_loss: 0.8237
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.6889 - loss: 0.9259 - val_accuracy: 0.7421 - val_loss: 0.7649
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.7210 - loss: 0.8330 - val_accuracy: 0.7500 - val_loss: 0.7488
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.7474 - loss: 0.7572 - val_accuracy: 0.7737 - val_loss: 0.6755
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 7ms/step - accuracy: 0.7629 - loss: 0.7167 - val_accuracy: 0.7931 - val_loss: 0.6296
Epoch 7/1

Epoch 1/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 23ms/step - accuracy: 0.0514 - loss: 4.6377 - val_accuracy: 0.1387 - val_loss: 3.6148
Epoch 2/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.1337 - loss: 3.6619 - val_accuracy: 0.2319 - val_loss: 3.0913
Epoch 3/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.2088 - loss: 3.2081 - val_accuracy: 0.2902 - val_loss: 2.8030
Epoch 4/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.2763 - loss: 2.8528 - val_accuracy: 0.3528 - val_loss: 2.4933
Epoch 5/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 13ms/step - accuracy: 0.3353 - loss: 2.5641 - val_accuracy: 0.4061 - val_loss: 2.2412
Epoch 6/10
[1m1563/1563[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 12ms/step - accuracy: 0.3863 - loss: 2.3260 - val_accuracy: 0.4234 - val_loss: 2.1927
Epoc

Preencha o dict abaixo substituindo os None com a acuracia final (acc) e o tempo de treinamento (time) encontrado no seu experimento pra cada dataset

In [6]:
results = {
    'mnist':         {'acc': 0.9879000186920166, 'time': 102.20615267100038},
    'fashion_mnist': {'acc': 0.9099000096321106, 'time': 100.50723515699974},
    'cifar10':       {'acc': 0.8201000094413757, 'time': 130.7743950240001 },
    'cifar100':      {'acc': 0.5421000123023987, 'time': 233.28933259800033}
}