In [None]:
# Setup, Version check and Common imports

# Python ≥ 3.7 is required
import sys
assert sys.version_info >= (3, 7)


# TensorFlow ≥ 2.8 is required
import tensorflow as tf
from packaging import version

assert version.parse(tf.__version__) >= version.parse("2.8.0")

# Common imports
import numpy as np
import os

from tensorflow import keras
from tensorflow.keras import layers

import matplotlib.pyplot as plt

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=14)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)

print('Python version: ', sys.version_info)
print('TF version: ', tf.__version__)
print('Keras version: ', keras.__version__)
print('GPU is', 'available' if tf.config.list_physical_devices('GPU') else 'NOT AVAILABLE')

**1. Obtaining and Splitting the Dataset into 3 Sets (Train, Test, Validation)**


In [None]:

# Load CIFAR100 dataset from keras datasets:
# https://keras.io/api/datasets/cifar100/
# https://www.cs.toronto.edu/~kriz/cifar.html

# The load_data() method creates train and test sets. The parameter label_mode specifies the category labels: 'fine' or 'coarse'
# In this class we will adopt the coarse classification, corresponding to 20 categories

from keras.datasets import cifar100
from sklearn.model_selection import train_test_split

(train_images_full, train_labels_full), (test_images, test_labels) = cifar100.load_data(label_mode = 'coarse')

train_labels_full = train_labels_full.squeeze()
test_labels = test_labels.squeeze()


# We further divide the original train datasets into train and validation datasets
train_images, valid_images, train_labels, valid_labels = train_test_split(
    train_images_full, train_labels_full,
    test_size=0.1,
    random_state=42,
    stratify=train_labels_full
)

# Normalize data to interval [0, 1]

train_images = train_images / 255.0
valid_images = valid_images / 255.0
test_images = test_images / 255.0


In [None]:

# Complete this section
# Confirm the dimensions of all tensors previously created

# PLACE CODE HERE



**Questions:**

1. What is the shape of the tensors? These dimensions correspond to what?

2. How many elements does each of these sets have?

3. Why do we need three sets?

**2. Visualizing Some Examples**

In [None]:
# Visualize a few examples

coarse_names = [
    "aquatic mammals", "fish", "flowers", "food containers",
    "fruit and vegetables", "household electrical devices",
    "household furniture", "insects", "large carnivores",
    "large man-made outdoor things", "large natural outdoor scenes",
    "large omnivores and herbivores", "medium mammals",
    "non-insect invertebrates", "people", "reptiles",
    "small mammals", "trees", "vehicles 1", "vehicles 2"
]

n_rows = 6
n_cols = 6

# Change the value of start to visualize different examples
start = 0

plt.figure(figsize=(n_cols * 2, n_rows * 1.2))
for row in range(n_rows):
    for col in range(n_cols):
        index = n_cols * row + col
        plt.subplot(n_rows, n_cols, index + 1)
        plt.imshow(train_images[index + start])
        plt.axis('off')
        plt.title(coarse_names[train_labels[index + start]], fontsize=8)
plt.subplots_adjust(wspace=0.2, hspace=0.5)
plt.show()


**3. Linear Feed-Forward NN**

In [None]:
# Build a feed-forward NN with Keras Sequential API: https://keras.io/api/models/

# This linear version does not have hidden layers
# 1. It has an input layer to receive information
# 2. It has final layer with the SoftMax activation function and the number of units should match the number of classes
# Complete the missing details:

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

linearModel = keras.Sequential([
    # Input layer
    layers.Input(shape=[32,32,3]),


    # No hidden layers

    # Final layer


])



**Question:**

Explain all the details of the NN that was just created:
1. Number of input nodes
2. Number of output nodes
3. Selection of activation functions


In [None]:
# Summary of the linear network architecture

linearModel.summary()


**Question:**

1. How many weights has the NN?

2. How were these weights initialized?



In [None]:
# Model compilation: https://keras.io/api/models/model_training_apis/
# Three components have to be defined:
# 1. the Optimizer to be used in training
# 2. The loss function
# 3. The evaluation metric

linearModel.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.SGD(),
              metrics=["accuracy"])



In [None]:
# Model training
# Hyper-parameters: batch size and epochs
# Validation datasets can also be provided

# It returns a history object.
# Its History.history attribute is a record of training loss values and metrics values at successive epochs,
# as well as validation loss values and validation metrics values (if applicable).

history = linearModel.fit(train_images, train_labels, batch_size=32, epochs=20,
                    validation_data=(valid_images, valid_labels))



In [None]:

# Plot the evolution of the accuracy metrics

import pandas as pd

x = pd.DataFrame(history.history, columns = ['accuracy', 'val_accuracy'])
x.plot(figsize=(8, 5))
plt.grid(True)
plt.show()



In [None]:

# Evaluating the generalization ability of the linear model
# The test set will be used in this step
# Classification of a set of examples can be performed using the evaluate() method:  https://keras.io/api/models/model_training_apis/

test_loss, test_acc = linearModel.evaluate(test_images, test_labels)
print(f"Test Accuracy: {test_acc}")


**Questions:**

1. How do you analyze these results?

2. What is the difference between the validation and test datasets?



**4. Shallow Feed-forward NN**

In [None]:
# Build a feed-forward NN with Keras Sequential API: https://keras.io/api/models/

# This shallow version has a single hidden layer
# 1. It has an input layer to receive information
# 2. It has one hidden layer with 128 units and sigmoid activation function
# 3. It has final layer with the SoftMax activation function and the number of units should match the number of classes

# Complete the missing details:

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)


shallowModel = keras.Sequential([
    # Input layer


    # One hidden layer


    # Final layer


])




In [None]:
# Summary of the shallow network architecture

shallowModel.summary()



In [None]:
# Layer detailed analysis

hidden1 = shallowModel.layers[1]
weights, biases = hidden1.get_weights()

print('Layer ', hidden1.name)
print('Weights with shape ', weights.shape, ' :\n', weights)
print('Biases with shape ', biases.shape, ' :\n', biases)


In [None]:
# Shallow model compiling and training

shallowModel.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.SGD(),
              metrics=["accuracy"])

history = shallowModel.fit(train_images, train_labels, batch_size=32, epochs=20,
                    validation_data=(valid_images, valid_labels))




In [None]:

x = pd.DataFrame(history.history, columns = ['accuracy', 'val_accuracy'])
x.plot(figsize=(8, 5))
plt.grid(True)
plt.show()


In [None]:
# Evaluating the generalization ability of the shallow model
# The test set will be used in this step
# Classification of a set of examples can be performed using the evaluate() method:  https://keras.io/api/models/model_training_apis/

test_loss, test_acc = shallowModel.evaluate(test_images, test_labels)
print(f"Test Accuracy: {test_acc}")

**Question:**

1. How do you analyze these results?

**5. Deep Feed-forward NN**

In [None]:
# Build a feed-forward NN with Keras Sequential API: https://keras.io/api/models/

# This deep version has two hidden layers
# 1. It has an input layer to receive information
# 2. The first hidden layer has 128 units with the sigmoid activation function
# 3. The second hidden layer has 64 units with the sigmoid activation function
# 3. It has final layer with the SoftMax activation function and the number of units should match the number of classes

# Complete the missing details:

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)


deepModel = keras.Sequential([
    # Input layer


    # Two hidden layer



    # Final layer


])


In [None]:
# Summary of the deep network architecture

deepModel.summary()

In [None]:
# Deep model compiling and training

deepModel.compile(loss="sparse_categorical_crossentropy",
              optimizer=keras.optimizers.SGD(),
              metrics=["accuracy"])



history = deepModel.fit(train_images, train_labels, batch_size=32, epochs=20,
                    validation_data=(valid_images, valid_labels))


In [None]:

x = pd.DataFrame(history.history, columns = ['accuracy', 'val_accuracy'])
x.plot(figsize=(8, 5))
plt.grid(True)
plt.show()


In [None]:
# Evaluating the generalization ability of the deep model
# The test set will be used in this step
# Classification of a set of examples can be performed using the evaluate() method:  https://keras.io/api/models/model_training_apis/

test_loss, test_acc = deepModel.evaluate(test_images, test_labels)
print(f"Test Accuracy: {test_acc}")


**Question:**

1. Analyze and compare all results that were obtained with the three models

**6. Model Saving**

In [None]:
# Save the model. All models details (architecture, configuration, weights) can be saved to disk.
# The method save creates a folder with all the information

deepModel.save("Modelo_Aula2.keras")

# The model can be later retrieved with the method load_model()


In [None]:
# Convert model to png and save it to a file

dot_img_file = 'deep.png'
keras.utils.plot_model(deepModel, to_file=dot_img_file, show_shapes=True, show_layer_activations=True)



**7. Changes in the Architecture of the Neural Network**

The performance of the neural network is poor. It is mandatory to perform changes, aiming at improving its performance. There are several possibilities that can be tested:

The proposed neural network may not be the most suitable for this problem. Change its architecture (number of layers / number of neurons per layer) and document how performance changes. The following constraints apply:
  *    The Keras Sequential API must be used
  *    Only Flatten and Dense layers can be used
  *    Activation functions: Sigmoid, Tanh, SoftMax
  *    Optimizer: SGD
  *    Budget: 2 million weights

Perform some tests, document how results change and present a simple analysis of the outcome.

Your target is to achieve 35% accuracy on the test dataset







In [None]:
# Code goes here

keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

newModel = keras.Sequential([
    # Input layer


    # Hidden layers


    # Final layer


])

# Compile



# Train



# Evaluate


