In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
""" Load the Fashion MNIST dataset """
mnist = tf.keras.datasets.fashion_mnist
(training_images, training_labels), (test_images, test_labels) = mnist.load_data()

In [None]:
""" Explore what the data looks like and feel free to experiment! Try 0 first, then 42"""
import matplotlib.pyplot as plt

plt.imshow(training_images[42])
print(training_labels[42])
print(training_images[42])

In [None]:
""" Our images have pixels in the range of 0-255, but Neural Networks work best with normalized data - so let's change each one from 0-1 instead.

This is called `Normalizing` and is easily done by diving the value of each pixel by 255

* In Python, you can do this against an ENTIRE array in a single line as shown below
"""
training_images = training_images / 255.0
test_images = test_images / 255.0

In [None]:
""" Now I can define the model.

* Sequential: Defines a SEQUENCE of layers in the Neural Network
* Flatten: Remember earlier when our images where squares? Flatten just takes that square and turns it into a 1-dimensional set
* Dense: Add a layer of Neurons. Each layer of neurons needs an Activation Function to tell them what to do
* Relu: Effectively means "If X>0 return X, else return 0" - so it only passes values 0 or greater to the next layer in the network
* Softmax: Takes a set of values and picks the biggest one. For example, if the output of the last layer looks like

    [0.1, 0.1, 0.05, 0.1, 9.5, 0.1, 0.05, 0.05, 0.05]

    It saves you from having to fish for the biggest value and turns it into [0, 0, 0, 0, 1, 0, 0, 0, 0] - The goal is to save a lot of coding!
"""
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),                           # Input Layer     - Flatten the 28x28 image to a 1-dimensional set
    tf.keras.layers.Dense(128, activation=tf.nn.relu),   # Middle Layer(s) - aka Hidden Layers, try to figure out the Rules between them
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)  # Output Layer    - There are 10 categories, so return the category with the highest probability
])

In [None]:
""" Compile and Train the Model """
model.compile(
    optimizer=tf.optimizers.Adam(),
    loss=tf.losses.SparseCategoricalCrossentropy(),
    metrics='accuracy'
)
model.fit(training_images, training_labels, epochs=5)

In [None]:
""" Test the Trained Model against images it hasn't seen yet.

We would expect some errors since we don't have 100% accuracy, but if the evaluation is WAY different, then we have a problem...
"""
model.evaluate(test_images, test_labels)

In [None]:
""" Exercise 1

For this first exercise run the below code: It creates a set of classifications for each of the test images, and then prints the first entry in the classifications. The output, after you run it, is a list of numbers. Why do you think this is, and what do those numbers represent?
"""
classifications = model.predict(test_images)
print(classifications[0])

### My Answer

For each image, a probability is created for each category where 10 is 100% probability.
Since there are 10 categories, there are 10 items in the list and each position corresponds to a different category aka label.

In this case, because we are using the `Softmax` activation in the output layer, the highest probability will stand out:

[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]

In [None]:
 # prints a 7 which is tied to a specific label when we normalized the labels
 print(test_labels[9])

In [1]:
""" Exercise 2

Let's now look at the layers in your model. Experiment with different values for the dense layer with 512 neurons. What different results do you get for loss, training time, etc.? Why do you think that's the case?
"""
import tensorflow as tf

# 1. Load Data
mnist = tf.keras.datasets.mnist
(training_images, training_labels), (test_images, test_labels) = mnist.load_data()

# 2. Preprocess the data - aka normalize image data
training_images = training_images / 255.0
test_images = test_images / 255.0

# 3. Define the model
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

# 4. Compile the model
model.compile(
    optimizer=tf.optimizers.Adam(),
    loss=tf.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

# 5. Fit aka Train the Model
model.fit(training_images, training_labels, epochs=5)

# 6. Evaluate aka Test the Model
model.evaluate(test_images, test_labels)

# 7. Explore the results
classifications = model.predict(test_images)

print(classifications[0])
print(test_labels[0])

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
[1.1362819e-06 6.9358670e-08 1.5741627e-04 7.9357764e-04 3.1485832e-09
 3.5293901e-08 3.8833398e-10 9.9902177e-01 2.1075966e-05 4.9223440e-06]
7


### My answer

The training takes longer, but is more accurate. This is because the data has to go through more neurons to do more calculations in the network.

> This doesn't mean "more is better" - you can hit the law of diminishing returns very quickly!

In [None]:
""" Exercise 3

What would happen if we remove the Flatten() layer?

YOU GET AN ERROR ABOUT THE SHAPE OF THE DATA!

* This reinforces the rule of thumb that the first layer in your network should be the same shape as your data.
"""

import tensorflow as tf
print(tf.__version__)

mnist = tf.keras.datasets.mnist

(training_images, training_labels) ,  (test_images, test_labels) = mnist.load_data()

training_images = training_images/255.0
test_images = test_images/255.0

model = tf.keras.models.Sequential([#tf.keras.layers.Flatten(),
                                    tf.keras.layers.Dense(64, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

model.compile(optimizer = 'adam',
              loss = 'sparse_categorical_crossentropy')

model.fit(training_images, training_labels, epochs=5)

model.evaluate(test_images, test_labels)

classifications = model.predict(test_images)

print(classifications[0])
print(test_labels[0])

In [None]:
""" Exercise 4

Consider the find (output) layers. Why are there 10 of them? What would happen if you had a different amount than 10?

YOU GET AN ERROR!

* Another rule of thumb - The number of neurons in the last layer should match the number of classes you are classifying for.
"""

In [None]:
""" Exercise 5

Consider the effects of additional layers in the network. What will happen if you add another layer between the one with 512 and the final layer with 10?

There isn't a significant impact - because this is relatively simple data. For far more complex data (including color images to be classified as flowers), extra layers are often necessary.
"""
import tensorflow as tf
print(tf.__version__)

mnist = tf.keras.datasets.mnist

(training_images, training_labels) ,  (test_images, test_labels) = mnist.load_data()

training_images = training_images/255.0
test_images = test_images/255.0

model = tf.keras.models.Sequential([tf.keras.layers.Flatten(),
                                    tf.keras.layers.Dense(512, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(256, activation=tf.nn.relu),
                                    tf.keras.layers.Dense(10, activation=tf.nn.softmax)])

model.compile(optimizer = 'adam',
              loss = 'sparse_categorical_crossentropy')

model.fit(training_images, training_labels, epochs=5)

model.evaluate(test_images, test_labels)

classifications = model.predict(test_images)

print(classifications[0])
print(test_labels[0])

In [None]:
""" Exercise 6

Consider the impact of training for more or fewer epochs. What would happen?

Try 15 epochs -- you'll probably get a model with a much better loss than the one with 5.
Try 30 epochs -- you might see the loss value stops decreasing, and sometimes increases.

* This is a side effect of something called 'overfitting'. There's no point in wasting your time training if you aren't improving your loss, right?
"""

In [None]:
""" Exercise 7

Before you trained, you normalized the data, going from values that were 0-255 to values that were 0-1. What would be the impact of removing that?

It was worse, but I believe it's because the computer has a much larger range of numbers to go through and calculate instead of small numbers between 0-1
"""

In [None]:
""" Exercise 8

Earlier when you trained for extra epochs you had an issue where your loss might change. It might have taken a bit of time for you to wait for the training to do that, and you might have thought 'wouldn't it be nice if I could stop the training when I reach a desired value?' -- i.e. 95% accuracy might be enough for you, and if you reach that after 3 epochs, why sit around waiting for it to finish a lot more epochs....So how would you fix that? Like any other program...you have callbacks! Let's see them in action...
"""
import tensorflow as tf
print(tf.__version__)

class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):  # The training is listening for the on_epoch_end() function
        if (logs.get('loss') < 0.4):
            print("\nReached 60% accuracy so cancelling training!")
            self.model.stop_training = True

callbacks = myCallback()

mnist = tf.keras.datasets.fashion_mnist
(training_images, training_labels), (test_images, test_labels) = mnist.load_data()

training_images=training_images/255.0
test_images=test_images/255.0

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.fit(training_images, training_labels, epochs=5, callbacks=[callbacks])
