In [None]:
"""
zalando_20230612.py
David Nilsson - Prime Fitness Studio AB
2023-06-12
"""

Import needed libraries

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
# Checking the version of TensorFlow
print('TensorFlow version:', tf.__version__)
#from tensorflow import keras

In [None]:
import keras_tuner
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.utils  import to_categorical
import keras_tuner as kt

In [None]:
# Helper libraries
import numpy as np
import sklearn
from   sklearn.model_selection import train_test_split

In [None]:
# Matlab plotting
import matplotlib
import matplotlib.pyplot as plt

In [None]:
"""
To easier optimize the hyperparameters the function build_model() could be used.

"""
# Defining a Keras model to search optimized hyper parameters
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Flatten())
    # Tune the number of layers.
    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(
            layers.Dense(
                # Tune number of units separately.
                units=hp.Int(f"units_{i}", min_value=8, max_value=512, step=32),
                activation=hp.Choice("activation", ["relu", "tanh"]),
            )
        )
    if hp.Boolean("dropout"):
        model.add(layers.Dropout(rate=0.25))
    model.add(layers.Dense(10, activation="softmax"))
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(
        optimizer=keras.optimizers.RMSprop(1e-2),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )       
    return model

In [None]:
"""
This code is to load the dataset fashion_mnist
"""
# Get Fashion-MNIST training and test data from Keras database (https://keras.io/datasets/)
(train_images0, train_labels0), (test_images, test_labels) = tf.keras.datasets.fashion_mnist.load_data()

In [None]:
# Define labels
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

In [None]:
# Split the training set into a training and a test set (20% is validation)
train_images, test_images, train_labels, test_labels = train_test_split(train_images0, train_labels0, test_size=0.20)

In [None]:
# Converting the labels to one-hot encoded format
train_labels = to_categorical(train_labels)
# Converting the data type of train_labels to integer scalar type
train_labels = train_labels.astype(np.int32)

In [None]:
# Converting the labels to one-hot encoded format
test_labels = to_categorical(test_labels)
# Convert the data type of train_labels to integer scalar type
test_labels = test_labels.astype(np.int32)

Converting the labels to one-hot encoded format
val_labels = to_categorical(val_labels)
val_labels = val_labels.astype(np.int32)

In [None]:
"""
Testing that the dataset has ben loaded correctly and what the shapes are of the dataframes
"""
# Print som basic information of data set sizes and data sizes
train_no,x,y = train_images.shape
print('No training images:',train_no, ' with image size:',x,'x',y)
label_no = len(train_labels)
if (label_no != train_no) : 
  print('# labels do not match # training images')

In [None]:
test_no,x,y = test_images.shape
label_no = len(test_labels)
print('No test images:',test_no)
if (label_no != test_no) : 
  print('# labels do not match # test images')

In [None]:
"""
val_no,x,y = val_images.shape
label_no = len(val_labels)
print('No val images:',val_no)
if (label_no != val_no) : 
  print('# labels do not match # val images')
"""

In [None]:
classes = np.unique(train_labels)
num_classes = len(classes)
print('Training labels:', np.unique(train_labels), "; That is,", num_classes,"classes." )

In [None]:
"""
Pre-processing and reshaping the data to be able to work with training a model
"""
# Adding an "empty" color dimension for our data sets
train_images = np.expand_dims(train_images, -1)
#val_images = np.expand_dims(val_images, -1)
test_images = np.expand_dims(test_images, -1)

In [None]:
# Adjusting input shape
input_shape = (28, 28, 1)

In [None]:
# Normalizing the images.
train_images = (train_images / 255) - 0.5
test_images = (test_images / 255) - 0.5
#val_images = (val_images / 255) - 0.5

In [None]:
# As these are images (28x28) it can be interesting to plot some as images
image_index = [42, 789] # "Random" images to print

In [None]:
for index in image_index:
  #print('Label:', class_names[train_labels[index]])
  print('Label:', class_names[train_labels[index][0]])
  plt.figure()
  plt.imshow(np.squeeze(train_images[index], axis=-1), cmap='gray')
  plt.gray()
  plt.grid(False)
  plt.show(block=False)

In [None]:
# Rechaping the input shape of the data
input_shape = (28, 28, 1)  # Updating the input shape to 28x28
print("Input shape", input_shape)

The Keras model will be the simplest Keras model for NN networks. 
Working with a sequensial model that can easily be added several layers and
have a good overview if not too big.
Going for smaller kernel from the start and increasing at the end to gain more
abrstraction and capture higher semantic information of the patterns of the data

In [None]:
# Add a convolution layer 1
model = Sequential([Conv2D(filters=32, kernel_size=(7, 7), padding='same', activation='relu', input_shape=input_shape),
# Adding a MaxPooling2D layer to reduse the data
MaxPooling2D(pool_size=(2, 2)),
# Add a convolution layer 2
Conv2D(filters=32, kernel_size=(11, 11), padding='same', activation='relu', input_shape=input_shape),
# Adding a MaxPooling2D layer to reduse the data
MaxPooling2D(pool_size=(2, 2)),
# Flatten the input to prepare the vector for fully connected layers
Flatten(),
# Add a hidden Dense layer
Dense(units=10, activation='relu'),  # Adjusted units parameter to 10,
# Add a an output layer. The output space is the number of classes
# Softmax makes the output as probablity vector of the different classes
Dense(units=10, activation='softmax')])

In [None]:
model.summary()

In [None]:
# Compiling the model, as a preparation for training
model.compile(
  optimizer='adam',             # Tried: adam, sgd, keras.optimizers.RMSprop(1e-2)
  loss='categorical_crossentropy', # sparse_categorical_crossentropy
  metrics=['accuracy']                                  # categorical_accuracy
)

In [None]:
epochs = 6      ## Number of epoch to run
batch_size = 32      ## Mini batch size

In [None]:
"""
Adding a class for an early stopping if there are to little progress in every epoch
"""
class EarlyStoppingAtMinLoss(keras.callbacks.Callback):
    
    #Stop training when the loss is at its min, i.e. the loss stops decreasing.

    #Arguments:
    #patience: Number of epochs to wait after min has been hit. After this
    #number of no improvement, training stops.
    

    def __init__(self, patience=3):
        super(EarlyStoppingAtMinLoss, self).__init__()
        self.patience = patience
        # best_weights to store the weights at which the minimum loss occurs.
        self.best_weights = None

    def on_train_begin(self, logs=None):
        # The number of epoch it has waited when loss is no lBinaryCrossentropyonger minimum.
        self.wait = 5
        # The epoch the training stops at.
        self.stopped_epoch = 0
        # Initialize the best as infinity.
        self.best = np.Inf

    def on_epoch_end(self, epoch, logs=None):
        current = logs.get("loss")
        if np.less(current, self.best):
            self.best = current
            self.wait = 0
            # Record the best weights if current results is better (less).
            self.best_weights = self.model.get_weights()
        else:
            self.wait += 1
            if self.wait >= self.patience:
                self.stopped_epoch = epoch
                self.model.stop_training = True
                print("Restoring model weights from the end of the best epoch.")
                self.model.set_weights(self.best_weights)

    def on_train_end(self, logs=None):
        if self.stopped_epoch > 0:
            print("Epoch %05d: early stopping" % (self.stopped_epoch + 1))

In [None]:
# Training the model
history = model.fit(
    train_images,
    train_labels,
    epochs=epochs,
    batch_size=batch_size,
    verbose=1,
    validation_data=(test_images, test_labels),
    callbacks=[EarlyStoppingAtMinLoss()]
)

In [None]:
# Initializing a Keras tuner based on random search for the model
tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=5
)

In [None]:
# Starting the search for the optimum hyperparameters for the model
tuner.search(train_images, train_labels, epochs=10, validation_data=(test_images, test_labels))

In [None]:
# Getting the best model and its summary
best_model = tuner.get_best_models()[0]

In [None]:
"""
Evaluating the model and plots the performance in terms of accuracy and error
"""
epochrange = range(1, epochs + 1)
train_acc = history.history['accuracy']
test_acc = history.history['accuracy']

In [None]:
train_loss = history.history['loss']
test_loss = history.history['val_loss']

In [None]:
plt.plot(epochrange, train_acc, 'bo', label='Training acc')
plt.plot(epochrange, test_acc, 'b', label='Validation acc')
plt.title('Training and testing accuracy (model 1)')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
plt.plot(epochrange, train_loss, 'bo', label='Training loss')
plt.plot(epochrange, test_loss, 'b', label='Test loss')
plt.title('Training and test loss (model 1)')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Evaluating the best model
test_loss, test_acc = best_model.evaluate(test_images, test_labels)
print('Test accuracy: %.3f' % test_acc)

In [None]:
"""
EXERCISE PART 1a
Question: "How many parameters does your model have?" 
Answer: The total parameters in my model is: 201 050, and those are all trainable.
Trainable parameters are weight coefficients to adjust to better connect the
relationship between the the neurons, the neurons themselves and the nodes within 
the neuron net, both the input layers neurons and the hidden layers nodes.
The model becomes overtrained when the error of the validation curve increases unproportional.
Based on the graphs, the number of epochs should be where the curve stops following the training 
error curv.

EXERCISE PART 1b
Question: "What test accuracy do you get?"
Answer: 90% validation accuracy at best

Issues were identified when Keras tuner were intruduced, by not recognizing the imported 
libraries. Loading the modules failed constantly by different approaches and libraries. 
Did not work to use Keras tuner to find a better architecture of the hyperparameters on my 
local machine (Win 10, VS Code, Anaconda and CMD.

Adam optimizer seem to perform well on this Zalando MNIST dataset, and deeper net than approx. 
10 Conv2D and approx. 10 dense layers led to low alpha in the grcategorp method were activated.

around 90% in the validation were achived, and when picking up the best parameter values 
during the training, 91% in validation were achived.

2a
Using the earlystopping class to let the model stop if the training and validation not working 
good enough.
I get at better performance of the model when the model is not overtraining.

2b, 2c
SGT were running onto these gradient issues with deeper layers, and this could probably be 
due to a more averaging effect through the nets layers. This regulation effect could 
be beneficial to generalize better to other dataset.

3
Auto Tune made the hyperparameters work better, and increases the performance at the
cost of extended training time. There is need of smart adjustment of the number of layers
to test vs. the extra time it takes.

Analysis
Since the execution stops all the time in Colab and the Keras Tuder not working properly 
on the local machine, there were hard to find a model that works without flaws.
More extensive search for better hyperparameters would be beneficial.
Due to the initialization based on randomness, the resultsof the performance differs from 
time to time.
"""