# Colab

To load data from your Google drive, first you need to mount Google drive. First, run the code below. Second, in order to get the authorization code, you should click on the link, choose your Google account, click on 'allow' and copy the code and paste it in the box below and press Enter. Once done it will show you "Mounted at gdrive". You know the Google drive is mounted once you see your folders on the left panel.







In [None]:
from google.colab import drive
drive.mount('gdrive')

# 1) Import 

In [None]:
import tensorflow as tf

In [None]:
# check the version
tf.__version__

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout 
from tensorflow.keras import regularizers
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.preprocessing import image

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# 2) Load Data

In [None]:
fashion_mnist_data = tf.keras.datasets.fashion_mnist
(X_train, y_train), (X_test, y_test) = fashion_mnist_data.load_data()

In [None]:
print("train X shape: ", X_train.shape) # X_train: numpy array with shape: (num_samples_train, num_features)
print("train y shape: ", y_train.shape) # y_train: numpy array with shape: (num_samples_train,) 
print("test X shape: ", X_test.shape)   # X_test: numpy array with shape: (num_samples_test, num_features)
print("test y shape: ", y_test.shape)   # y_test: numpy array with shape: (num_samples_test,)

The labels are integers from 0 to 9, and each number represents a piece of clothes following the order in labels.

In [None]:
print("minimum train y value: ", min(y_train))
print("maximum train y value: ", max(y_train))

In [None]:
# Define the labels

labels = [
    'T-shirt/top', # 0
    'Trouser',     # 1
    'Pullover',    # 2
    'Dress',       # 3
    'Coat',        # 4
    'Sandal',      # 5
    'Shirt',       # 6
    'Sneaker',     # 7
    'Bag',         # 8
    'Ankle boot'   # 9
]

In [None]:
# Display one of the images

i = 1
img = X_train[i, : , :]
plt.imshow(img)
plt.show()
print(f"label: {labels[y_train[i]]}")

In [None]:
# display y label
y_train[i]

In [None]:
# Rescale the image values so that they lie in between 0 and 1.

X_train = X_train / 255.
X_test = X_test / 255.

# 3) Build Model (Sequential)

Here, we build a NN model (feed forward) with one or more hidden layers. 

## Examples

In [None]:
# example 1

# building a NN model 
# the input layer with 16 units
# one hidden layer, with 64 units, with a relu activation function.
# the output layer is binary with a sigmoid activation function.

model1 = Sequential([
                    Dense(64, activation = 'relu', input_shape = (16,)),
                    Dense(1, activation = 'sigmoid')
])

Note: If you don't specify the activation function, it will be linear.

Read more about other activation choices here: https://www.tensorflow.org/api_docs/python/tf/keras/activations

Let's take a look at the model we just bulit:

In [None]:
model1.summary()

We can also take a look at the value of initial weights and biases:

In [None]:
model1.weights

An alternative way to build sequential models:

In [None]:
# example 1 - alternative method:

# building a NN model 
# the input layer with 16 units
# one hidden layer, with 64 units, with a relu activation function.
# the output layer is binary with a sigmoid activation function.

model1 = Sequential()
model1.add(Dense(64, activation = 'relu', input_shape = (16,)))
model1.add(Dense(1, activation = 'sigmoid'))

# model.add append additional layers to the model

In [None]:
model1.summary()

More examples:

In [None]:
# example 2

# building a NN model 
# the input layer with 16 units
# one hidden layer, with 32 units, with a relu activation function.
# the output layer is multi-class with 10 classes (activation function is softmax).

model2 = Sequential([
                    Dense(32, activation = 'relu', input_shape = (16,)),
                    Dense(10, activation = 'softmax')
])

In [None]:
model2.summary()

In [None]:
# example 3

# building a NN model 
# the input layer with 16 units
# first hidden layer, with 32 units, with a relu activation function.
# second hidden layer, with 8 units, with a relu activation function.
# the output layer is binary with a sigmoid activation function.

model3 = Sequential([
                    Dense(32, activation = 'relu', input_shape = (16,)),
                    Dense(8, activation = 'relu'),
                    Dense(1, activation = 'sigmoid')
])

In [None]:
model3.summary()

In [None]:
# example 4

# building a NN model
# the input layer with 16 units
# no hidden layers
# the output layer is binary with a sigmoid activation function.

model4 = Sequential([
                    Dense(1, activation = 'sigmoid', input_shape = (16,))
])

In [None]:
model4.summary()

If the input shape is two-dimensional, we can flatten it and unroll it into a long one-dimensional vector first:

In [None]:
# example 5:

# building a NN model 
# the input layer with input_shape (4, 4)
# one hidden layer, with 64 units, with a relu activation function.
# the output layer is binary with a sigmoid activation function.

model5 = Sequential([
                    Flatten(input_shape = (4, 4)), #(16,)
                    Dense(64, activation = 'relu'),
                    Dense(1, activation = 'sigmoid')
])

In [None]:
model5.summary()

We can define a name for each layer:

In [None]:
# example 5 - define names for each layer:

# building a NN model 
# the input layer with input_shape (4, 4)
# one hidden layer, with 64 units, with a relu activation function.
# the output layer is binary with a sigmoid activation function.

model5 = Sequential([
                    Flatten(input_shape = (4, 4), name = 'layer_0'), 
                    Dense(64, activation = 'relu', name = 'layer_1'),
                    Dense(1, activation = 'sigmoid', name = 'layer_2')
])

In [None]:
model5.summary()

## For our dataset

In [None]:
# build the model

# the input layer with input_shape (28, 28)
# one hidden layer, with 64 units, with a relu activation function.
# the output layer is multi-class with 10 classes (activation function is softmax).

model = Sequential([
                    Flatten(input_shape = (28, 28)),
                    Dense(64, activation = 'relu'),
                    Dense(10, activation = 'softmax')
])

In [None]:
model.summary()

# 4) Start Training (Compile)

To start training our network on data, we need to define the optimization algorithm, and a loss function that will give us a measure of our model's performance. Also we define a set of metrics that we want to keep track of as the model is training. These metrics will be calculated for each epoch of training along with the evaluation of the loss function on the training data.

## Examples

In [None]:
# example 1:
model1 = Sequential([
                    Dense(64, activation = 'relu', input_shape = (16,)),
                    Dense(1, activation = 'sigmoid')
])

model1.compile(
    optimizer = 'adam',  
    loss = 'binary_crossentropy', 
    metrics = ['binary_accuracy'] 
)

Read more about other optimizer choices here: https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/

Read more about other loss choices here: https://www.tensorflow.org/api_docs/python/tf/keras/losses

Read more about other metrics choices here: https://www.tensorflow.org/api_docs/python/tf/keras/metrics

Each of the strings in the code above is a reference to another object or function and we can always use that object or function directly:

In [None]:
# example 1 - alternative compile method:

model1.compile(
    optimizer = tf.keras.optimizers.Adam(),
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = [tf.keras.metrics.BinaryAccuracy()]
)

The reason why you might want to use the code above is because it gives you greater flexibility as many of these objects and functions themselves have options that you might want to have control over.

In [None]:
# example 1 - we can change the default parameters

model1.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.003, beta_1=0.9, beta_2=0.999, epsilon=1e-07), # the default values.
    loss = tf.keras.losses.BinaryCrossentropy(from_logits=True),          # use sigmoid activation, if in the final hidden layer, we do not specify an activation function or define a linear one.
    metrics = [tf.keras.metrics.BinaryAccuracy(threshold=0.5)]            # default is 0.5 (the threshold for predictions to be classed as positive).
)

More examples:

In [None]:
# example 2

# building a NN model 
# the input layer with 16 units
# one hidden layer, with 64 units, with a relu activation function.
# the output layer is multi-class with 10 classes (activation function is softmax).

model2 = Sequential([
                    Dense(64, activation = 'relu', input_shape = (16,)),
                    Dense(10, activation = 'softmax')
])

model2.compile(
    optimizer = 'rmsprop', 
    loss = 'categorical_crossentropy', 
    metrics = ['categorical_accuracy', 'mae']
)

## For our dataset

In [None]:
# for our data set:

model = Sequential([
                    Flatten(input_shape = (28, 28)),
                    Dense(64, activation = 'relu'),
                    Dense(10, activation = 'softmax')
])


model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005),
    loss = 'sparse_categorical_crossentropy', # Use this loss function when the y labels are integers. 
    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()] # Use this accuracy when the y labels are integers.
)

An alternative compile method:

In [None]:
# for our data set - alternative compile method:

opt = tf.keras.optimizers.Adam(learning_rate = 0.005)
acc = tf.keras.metrics.SparseCategoricalAccuracy()

model.compile(
    optimizer = opt,
    loss = 'sparse_categorical_crossentropy', 
    metrics = [acc]
)

# 5) Model Fit

Calling model.fit returns a TensorFlow history object. This object contains a record of the progress of the network during training in terms of the loss and the metrics that we defined when we compiled the model.

In [None]:
X_train.shape

In [None]:
# 1 epoch = (for 235 mini-batch) *(256 images in each mini batch)

In [None]:
# fit the model

history = model.fit(X_train, y_train, epochs = 8, batch_size = 256, verbose = 2)
# epochs = 8: training will make 8 complete passes through the dataset.
# batch_size = 256: By default, the batch size is set to 32.
# verbose = 2: print only one line per epoch
# verbose = 1 (or True): print everything (the defalut)
# verbose = 0 (or False): silence the print out

The object history has an attribute called history that is a dictionary and contains information about the loss functions and matrix after each of the epochs.

In [None]:
print(history.history.keys())

Let's plot the training history:

In [None]:
# Load the history into a pandas Dataframe

df = pd.DataFrame(history.history)
df.head()

In [None]:
# Make a plot for the loss

loss_plot = df.plot(y="loss", title = "Loss vs. Epochs", legend=False)
loss_plot.set(xlabel="Epochs", ylabel="Loss")

In [None]:
# Make a plot for the accuracy

accuracy_plot = df.plot(y="sparse_categorical_accuracy", legend=False)
accuracy_plot.set(xlabel="Epochs", ylabel="sparse_categorical_accuracy")

# 6) Model Evaluate on Test

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test)

# 7) Model Predict

In [None]:
# Choose a random test image

random_inx = np.random.choice(X_test.shape[0])
random_inx = 30
X_sample = X_test[random_inx]
plt.imshow(X_sample)
plt.show()
print(f"Label: {labels[y_test[random_inx]]}")

In [None]:
X_sample.shape

In [None]:
# need to reshape X_sample

X_sample = X_sample.reshape(1, 28, 28) # X_sample: (num_samples, X_sample.shape)
X_sample.shape

In [None]:
# Get the model predictions

model.predict(X_sample)

In [None]:
predictions = model.predict(X_sample)
print(np.argmax(predictions))
print(f"Model prediction:{labels[np.argmax(predictions)]}")

# 8) Model Fit with Validation Sets

Sometimes, datasets have already been packaged up for us with the training and test split:

In [None]:
# Recall:
# fashion_mnist_data = tf.keras.datasets.fashion_mnist
# (X_train, y_train), (X_test, y_test) = fashion_mnist_data.load_data()

In [None]:
# Recall:
# build model - nothing changed

model = Sequential([
                    Flatten(input_shape = (28, 28)),
                    Dense(64, activation = 'relu'),
                    Dense(10, activation = 'softmax')
])

In [None]:
# Recall:
# Start Training - nothing changed

model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005),
    loss = 'sparse_categorical_crossentropy', # Use this loss function when the y labels are integers. 
    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()] # Use this accuracy when the y labels are integers.
)

In [None]:
# fit the model with validation set

history = model.fit(X_train, y_train, epochs = 25, batch_size = 256, verbose = False, validation_split=0.2) 
# The 0.2 means that 20 percent of the training data will be held back for validation.

The model's performance is recorded on both the training and validation sets.

In [None]:
print(history.history.keys())

In [None]:
# Plot the training and validation loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

From the graph, we can see that the model has vastly overfit the training data and so it underperforms on the validation data.

In [None]:
# Testing mode

test_loss, test_accuracy = model.evaluate(X_test, y_test)

Practice Question) See if you can reduce overfitting by, for example, altering the number of epochs at the model trains for or changing the layer structure of the model.

# 9) Reduce Overfit - Change the NN Model

In [None]:
# build model

model = Sequential([
                    Flatten(input_shape = (28, 28)),
                    Dense(16, activation = 'relu'),
                    Dense(10, activation = 'softmax')
])

In [None]:
# Start Training 

model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005),
    loss = 'sparse_categorical_crossentropy', # Use this loss function when the y labels are integers. 
    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()] # Use this accuracy when the y labels are integers.
)

In [None]:
# fit the model with validation set

history = model.fit(X_train, y_train, epochs = 25, batch_size = 256, verbose = False, validation_split=0.2) 

In [None]:
# Plot the training and validation loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

# 10) Reduce Overfit - L2 Regularization

Model Regularization

In [None]:
# build model - we should change this!

model = Sequential([
                   Flatten(input_shape = (28, 28)),
                   Dense(64, activation = 'relu'),
                   Dense(10, activation = 'softmax')
])

In [None]:
#l2 regularizer
model = Sequential([
                    Flatten(input_shape = (28, 28)),
                    Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)), # penalty rate (or lambda) =0.001
                    Dense(10, activation = 'softmax', kernel_regularizer=tf.keras.regularizers.l2(0.001))
])

# the weight matrix is sometimes called the kernel.

In [None]:
# Start Training  - nothing changed here!

model.compile(
     optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005),
     loss = 'sparse_categorical_crossentropy', # Use this loss function when the y labels are integers. 
     metrics = [tf.keras.metrics.SparseCategoricalAccuracy()] # Use this accuracy when the y labels are integers.
 )

In [None]:
# fit the model with validation set  - nothing changed here!

history = model.fit(X_train, y_train, epochs = 25, batch_size = 256, verbose = False, validation_split=0.2) 
# The 0.2 means that 20 percent of the training data will be held back for validation.

In [None]:
# Plot the training and validation loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

In [None]:
# Testing mode

test_loss, test_accuracy = model.evaluate(X_test, y_test)

# 11) Reduce Overfit - Dropout Regularization

In [None]:
# Recall:

#model = Sequential([
#                   Flatten(input_shape = (28, 28)),
#                   Dense(64, activation = 'relu'),
#                   Dense(10, activation = 'softmax')
#])

In [None]:
#dropout regularizer
#dropout rate = 1-keep_probability

model = Sequential([
                    Flatten(input_shape = (28, 28)),
                    Dense(64, activation='relu'),
                    Dropout(0.5), #dropout rate 
                    Dense(10, activation = 'softmax')
])
                    

In [None]:
# Start Training - nothing changed here!

model.compile(
     optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005),
     loss = 'sparse_categorical_crossentropy', # Use this loss function when the y labels are integers. 
     metrics = [tf.keras.metrics.SparseCategoricalAccuracy()] # Use this accuracy when the y labels are integers.
 )

In [None]:
# fit the model with validation set  - nothing changed here!

history = model.fit(X_train, y_train, epochs = 25, batch_size = 256, verbose = False, validation_split=0.2) 
# The 0.2 means that 20 percent of the training data will be held back for validation.

In [None]:
# Plot the training and validation loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

In [None]:
# Testing mode, no dropout

test_loss, test_accuracy = model.evaluate(X_test, y_test)

# 12) Reduce Overfit - Early Stopping 

Another regularization approach is called early stopping. 
Early stopping is a technique that monitors the performance of the network for every epoch on a held out validation set during the training run, and terminates the training conditional on the validation performance.

In [None]:
# build the original model without L2 or dropout regularization
model = Sequential([
                   Flatten(input_shape = (28, 28)),
                   Dense(64, activation = 'relu'),
                   Dense(10, activation = 'softmax')
])

In [None]:
# Start Training - nothing changed here!

model.compile(
     optimizer = tf.keras.optimizers.Adam(learning_rate = 0.005),
     loss = 'sparse_categorical_crossentropy', # Use this loss function when the y labels are integers. 
     metrics = [tf.keras.metrics.SparseCategoricalAccuracy()] # Use this accuracy when the y labels are integers.
 )

In [None]:
#set early stopping

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.01, patience=5) 

# by default, monitor='val_loss', which means that we use the validation loss as the performance measure to decide when to terminate the training.
# we could instead use 'val_accuracy'.

# min_delta: Minimum change in the monitored quantity to qualify as an improvement, 
# i.e. an absolute change of less than min_delta, will count as no improvement.
# by default, min_delta=0.

#patience: Number of consecutive epochs with no improvement after which training will be stopped.
# by default, patience is set to zero which terminates training as soon as the performance measure gets worse from one epoch to the next.

In [None]:
# fit the model with validation set  

history = model.fit(X_train, y_train, epochs = 25, batch_size = 256, verbose = 2, validation_split=0.2,
                    callbacks=[early_stopping]) 
# The 0.2 means that 20 percent of the training data will be held back for validation.

In [None]:
# Plot the training and validation loss

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss vs. epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Training', 'Validation'], loc='upper right')
plt.show()

In [None]:
# Testing mode

test_loss, test_accuracy = model.evaluate(X_test, y_test)