# MNIST Dataset using TensorFlow
### Setup

# Run this notebook in Google Colab!

## If you dare to try to set this up on your local machine (macbook) run the following commands in your terminal. Proceed at your own risk!
- conda install -c apple tensorflow-deps
- python -m pip install tensorflow-macos
- pip install tensorflow-datasets
- NOTE: DO NOT INSTALL THIS ON M1 MAC! As of Jan 17 2023 "python -m pip install tensorflow-metal" will not work for M1 Mac GPU Acceleration. Due to this the model will be trained on the CPU. DO NOT INSTALL THIS ON M1 MAC!

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Check if we are running off a GPU, if not we can change the runtime in the "Runtime" tab up top
tf.config.list_physical_devices()

In [None]:
!nvidia-smi

In [None]:
#Loading the dataset
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

In [None]:
X_train.shape

In [None]:
# For the most part you will be working with tensors instead of arrays/matrcies.
# Lets take a look at what a tensor is by making one.

scaler = tf.constant(1)

In [None]:
scaler.shape

In [None]:
scaler.ndim

In [None]:
vector = tf.constant([1,2,3])

In [None]:
vector.shape

In [None]:
vector.ndim

In [None]:
matrix = tf.constant([[1,2,3],
                     [4,5,6],
                     [7,8,9]])

In [None]:
matrix.shape

In [None]:
matrix.ndim

In [None]:
tensor = tf.ones(shape=(2,3,4))

In [None]:
tensor

In [None]:
tensor.shape

In [None]:
tensor.ndim

In [None]:
type(X_train)

In [None]:
# It is good practice to always convert data over to tensors as it provides us extra functionality 
# the same way a numpy array adds extra functionality to a list. Only do this with you features, not target.
X_train = tf.constant(X_train)

X_test = tf.constant(X_test)

In [None]:
X_train.ndim

In [None]:
#peak at the data
#each index is one observation(image) represented in a multi-dimensional tensor
X_train[0]

In [None]:
#each individual array is one numpy array representing one row of pixels
X_train[0][0]

In [None]:
y_train[0]

In [None]:
#how our data is split
print("Train Feature Matrix:", X_train.shape)
print("Test Feature Matrix:", X_test.shape)
print("Train Target Matrix:", y_train.shape)
print("Test Target Matrix:", y_test.shape)

In [None]:
#a look at some observations
fig, ax = plt.subplots(10, 10)
k = 0
for i in range(10):
    for j in range(10):
        ax[i][j].imshow(X_train[k], aspect='auto')
        k += 1
plt.show()

In [None]:
#defining our model
#sequential = feed forward network
model = tf.keras.Sequential([
    
    # reshape 28 row * 28 column data to 28*28 rows
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
      # dense(hidden) layer 1
    tf.keras.layers.Dense(256, activation='sigmoid'),  
    
    # dense(hidden) layer 2
    tf.keras.layers.Dense(128, activation='sigmoid'), 
    
      # output layer
    tf.keras.layers.Dense(10, activation='sigmoid'),  
])

In [None]:
model

In [None]:
#we need to "compile" our model by specifying our optimizer(how it learns), 
#how to calculate our cost function(loss),
#and what our metric is
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
#we train our model like any other with a x_train and y_train
#epochs is how many times we update our weights
#batch size is how many observation to look at while preforming SGD
#we can define a validation split between our epochs to determine how well it does with out of sample data
#I am going to save our fit back to a variable for later use
history = model.fit(X_train, y_train, epochs=100, 
          batch_size=2000, 
          validation_split=0.2)

In [None]:
#final model score against our test data
results = model.evaluate(X_test,  y_test, verbose = 0)
print('test loss, test acc:', results)

In [None]:
predictions = model.predict(tf.expand_dims(X_train[0], axis=0))

In [None]:
predictions

In [None]:
# We can see what prediction the model made using np.argmax on our predictions array
np.argmax(predictions)

In [None]:
# Our actual
y_train[0]

In [None]:
history_df = pd.DataFrame(history.history)

In [None]:
history_df.head()

In [None]:
# It looks like our model is reaching the point of diminshing returns with epochs. Let's take a look
history_df.accuracy.plot()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

In [None]:
history_df.loss.plot()
plt.xlabel('Epochs')
plt.ylabel('Loss')

In [None]:
# How can we stop it early if we don't need to contuniue training
#defining our model
#sequential = feed forward network
model2 = tf.keras.Sequential([
    
    # reshape 28 row * 28 column data to 28*28 rows
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    
      # dense(hidden) layer 1
    tf.keras.layers.Dense(256, activation='sigmoid'),  
    
    # dense(hidden) layer 2
    tf.keras.layers.Dense(128, activation='sigmoid'), 
    
      # output layer
    tf.keras.layers.Dense(10, activation='sigmoid'),  
])

model2.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

history2 = model2.fit(X_train, y_train, epochs=100, 
          batch_size=2000, 
          validation_split=0.2,
          callbacks = [callback])

In [None]:
# stopped early!, Let's replot
history_df2 = pd.DataFrame(history2.history)

history_df2.accuracy.plot()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()

history_df2.loss.plot()
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()

In [None]:
# Once we are happy we can save our model for later
model2.save('bestmodel')

In [None]:
# We can also load it up to use for predictions
new_model = tf.keras.models.load_model('bestmodel')

In [None]:
new_model

In [None]:
new_model.predict(tf.expand_dims(X_train[0], axis=0))

In [None]:
y_train[0]