In [3]:
import numpy as np
import pandas as pd
import os

os.environ['KERAS_BACKEND'] = 'tensorflow'

In [4]:
# The backend must be set before importing keras, not after
import keras as keras
from keras import layers
import keras.datasets.fashion_mnist

In [5]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

In [6]:
# # Load the data and split it between train and test sets
(Xtrain, ytrain), (Xtest, ytest) = keras.datasets.fashion_mnist.load_data()

# Scale images to the [0, 1] range
Xtrain = Xtrain.astype("float32") / 255
Xtest = Xtest.astype("float32") / 255

# Make sure images have shape (28, 28, 1)
Xtrain = np.expand_dims(Xtrain, -1)
Xtest = np.expand_dims(Xtest, -1)

print("x_train shape:", Xtrain.shape)
print(Xtrain.shape[0], "train samples")
print(Xtest.shape[0], "test samples")

#make sure all have same type
ytrain = ytrain.astype('float32')
ytest = ytest.astype('float32')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


Task#1: Implement the following provided fully connected NN.

In [7]:
# Task1: Implement a defined NN model 
# Define the fully connected NN model structure using keras.Sequential
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        # Hidden layer 1 = 256 nodes, linear activation
        layers.Dense(256, activation='linear'),
        # Hidden layer 2: 128 nodes, linear activation
        layers.Dense(128, activation='linear'),
        layers.Flatten(), # Add this line to flatten the output of the model
        # Output layer: 10 nodes, one per class
        layers.Dense(num_classes, activation='softmax'),
    ]
)

In [8]:
# View model summary
model.summary()

Keras contains two methods for training neural networks: model.compile() and model.fit(). Training options like the optimization algorithm and the loss function are set using model.compile(). The compiled model is trained using model.fit(), which also sets the batch size and number of epochs. model.compile() must always be called before model.fit(). A list of parameters for each method is in the https://keras.io/api/

You will need to respecify the optimizer to the defined CNN for question 3.

In [9]:
# Specify training choices (optimizer, loss function, metrics)
model.compile(
    optimizer='rmsprop',  # Optimizer
    # Loss function to minimize
    loss='SparseCategoricalCrossentropy',
    # List of metrics to monitor
    metrics=['accuracy'],
)

In [10]:
# Train the model
# set batch_size = 128, epochs = 10
training = model.fit(Xtrain, ytrain, batch_size=128, epochs=10)

Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 121ms/step - accuracy: 0.7361 - loss: 0.7917
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 122ms/step - accuracy: 0.8328 - loss: 0.4791
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 123ms/step - accuracy: 0.8411 - loss: 0.4521
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 117ms/step - accuracy: 0.8502 - loss: 0.4321
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 119ms/step - accuracy: 0.8521 - loss: 0.4216
Epoch 6/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 119ms/step - accuracy: 0.8560 - loss: 0.4177
Epoch 7/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 120ms/step - accuracy: 0.8574 - loss: 0.4151
Epoch 8/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 121ms/step - accuracy: 0.8571 - loss: 0.4108
Epoch 9/10
[1m4

In [11]:
# View the training history
training.history

{'accuracy': [0.7926833629608154,
  0.835099995136261,
  0.8428666591644287,
  0.8479666709899902,
  0.8509500026702881,
  0.8543000221252441,
  0.8562666773796082,
  0.8566333055496216,
  0.8584166765213013,
  0.8580999970436096],
 'loss': [0.6023350954055786,
  0.47249531745910645,
  0.44957083463668823,
  0.4354937970638275,
  0.4266079366207123,
  0.42140889167785645,
  0.4156930148601532,
  0.4103866517543793,
  0.4072835147380829,
  0.4052478075027466]}

In [14]:
# Evaluate the trained model on the testing data
score = model.evaluate(Xtest, ytest, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.481064110994339
Test accuracy: 0.829800009727478


In [16]:
# Task2: Define and Implement a CNN model using keras.Sequential
# your code goes from here


In [17]:
# View model summary
model.summary()

In [None]:
# Task2: Define and Implement a CNN model using keras.Sequential
# CNN model structure
# Define the CNN model structure using keras.Sequential
input_shape = (28, 28, 1)
num_classes = 10


In [20]:
# Specify training choices (optimizer, loss function, metrics)
modelCNN = keras.Sequential([
    keras.Input(shape=input_shape),
    # Convolutional layer 1: 32 filters, 3x3 kernel, ReLU activation
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
    
    # Convolutional layer 2: 64 filters, 3x3 kernel, ReLU activation
    layers.MaxPooling2D(pool_size=(2, 2)),
    
    # Flatten the output of the model    
    layers.Flatten(),
    
    #Fulle connected hidden layer
    layers.Dense(128, activation='relu'),
    
    layers.Dropout(0.5),
    
    layers.Dense(num_classes, activation='softmax'),
    # Output layer: 10 nodes, one per class
])

In [22]:
modelCNN.summary()


In [28]:
# Specify training choices (optimizer, loss function, metrics)
modelCNN.compile(
    optimizer='adam',  # Optimizer suggested by homework instructions
    # Loss function to minimize
    loss='SparseCategoricalCrossentropy',
    # List of metrics to monitor
    metrics=['accuracy'],
)

In [30]:

# Train the model
#  train it with a batch size = 128 for 10 epochs
trainingCNN = modelCNN.fit(Xtrain, ytrain, batch_size=128, epochs=10)


Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 21ms/step - accuracy: 0.9495 - loss: 0.1357
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 23ms/step - accuracy: 0.9543 - loss: 0.1262
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.9565 - loss: 0.1152
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 23ms/step - accuracy: 0.9576 - loss: 0.1110
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 21ms/step - accuracy: 0.9625 - loss: 0.0991
Epoch 6/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.9648 - loss: 0.0919
Epoch 7/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.9667 - loss: 0.0859
Epoch 8/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 22ms/step - accuracy: 0.9703 - loss: 0.0787
Epoch 9/10
[1m469/469[

In [31]:
# View the training loss and accuracy 
score = modelCNN.evaluate(Xtest, ytest, verbose=0)
print("CNN Test loss:", score[0])
print("CNN Test accuracy:", score[1])


CNN Test loss: 0.3075979948043823
CNN Test accuracy: 0.9246000051498413
