In [None]:
import ipympl
import IPython
import ipywidgets as pywidgets
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
%matplotlib widget
import numpy as np
import pandas
import random
import seaborn as sb
import sklearn
from sklearn import decomposition
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
import sys
import tensorflow
from tensorflow import keras
from tensorflow.keras import backend, Model
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential, load_model
import time

In [None]:
# Use the MNIST dataset that comes included with Keras
(data_training, labels_training), (data_testing, labels_testing) = mnist.load_data()

# load the trained model
model = load_model('../Data/digit_recognizer_model.hdf5')

# Load the model's history
history = np.load('../Data/model_history.npy',
                  allow_pickle = 'TRUE').item()

In [None]:
def result_eval_display(data_testing, labels_testing, data_training, labels_training, result_id):
    
    # Reshape each image from the data into 28x28 pixels
    data_training = data_training.reshape(data_training.shape[0], 28, 28, 1)
    data_testing = data_testing.reshape(data_testing.shape[0], 28, 28, 1)
    
    # Assign the labels for reference
    labels_training = keras.utils.to_categorical(labels_training)
    labels_testing = keras.utils.to_categorical(labels_testing)
    
    # Change values to float
    data_training = data_training.astype('float32')
    data_testing = data_testing.astype('float32')
    
    # Change to a value between 0 and 1
    # In this case, 0 indicates white, 1 indicates black
    data_training /= 255
    data_testing /= 255
    
    # Use built-in evaluate function
    evaluation = model.evaluate(data_testing,
                                labels_testing,
                                verbose = 1)
    
    # Return the accuracy or loss percentage
    if(result_id == 0):
        return evaluation[0] * 100
    else:
        return evaluation[1] * 100

In [None]:
def image_click(data_testing, index):
    
    # Reshape the data so that it will be processed correctly 
    image = data_testing[index].reshape(1, 28, 28, 1)
    
    # Use the predict() function to determine the number in the image
    result = model.predict(image)
    
    # Return the index of the max value on an axis, along with the max value itself
    # This returns the number (0-9) with the highest probability
    return np.argmax(result), max(result)

In [None]:
class figure_template(object):
    
    def __init__(self, fig, index, data_testing):

        # Standard pyplot figure
        self.fig = fig
        
        # Index of the image in the MNIST dataset
        self.index = index
        
        # MNIST dataset
        self.data_testing = data_testing
        
        # Current axis
        self.axis_current = plt.gca()
        
        # Uses the function to predict an image when the image is clicked
        self.cid = self.fig.canvas.mpl_connect('button_press_event',
                                               self.image_clicked)
        
        # Clear button used to clear the prediction
        self.button_clear = pywidgets.Button(description = "Clear",
                                             fontsize = 24)
        
        # Color of the clear button
        self.button_clear.style.button_color = "lightgray"
        
        # Uses the function to clear the prediction when the button is clicked
        self.button_clear.on_click(self.button_clear_clicked)
        
    # Predicts the number in the image
    def image_clicked(self, event):
        self.axis_current.set_title(image_click(data_testing, self.index)[0],
                                    fontsize = 24)
    
    # Clears the prediction
    def button_clear_clicked(self, event):
        self.axis_current.set_title("",
                                    fontsize = 24)

In [None]:
def image_create():
    
    # Grabs a random image from the MNIST dataset
    index = random.sample(range(len(data_testing)), 1)
    
    for i in range(len(index)):
        
        # Standard output method for widgets
        output = pywidgets.Output()
        with output:
            fig = plt.figure()
        
        # Convert the image to black and white (binary)
        plt.imshow(data_testing[index[i]],
                   cmap = plt.get_cmap('binary'),
                   picker = True)
        
        # Create new image figure
        fig_image_new = figure_template(fig,
                                        index[i],
                                        data_testing)
        
        # Vertical box with the clear button and the new image
        vbox = pywidgets.VBox([fig_image_new.button_clear,
                               output])
    
    return vbox

In [None]:
def image_new(event, vbox):
    
    # Close all images in the pyplot
    plt.close('all')
    
    # Grab a new image from the MNIST dataset
    vbox_new = image_create()
    
    # Add the new image as a child of the vbox
    vbox.children = vbox_new.children

In [None]:
# Used to simplify using the indices for loss and accuracy
def result_calculate(result_id):
    
    result = result_eval_display(data_testing,
                                 labels_testing,
                                 data_training,
                                 labels_training,
                                 result_id)
    
    return result

In [None]:
def accuracy_calculate(event, button, vbox):

    # Create the labels for the vbox
    label_total_tested = pywidgets.Label(value = '')
    label_total_correct = pywidgets.Label(value = '')
    
    # Add the labels to the vbox that will be displayed below
    vbox.children += (label_total_tested,
                      label_total_correct)
    
    # Calculate the accuracy result
    result = result_calculate(1)
    
    # String to hold the number of images tested
    total_tested = int(len(data_testing))
    
    # String (casted int) to hold the number of correct predictions
    # Converted from a percentage to the model's accuracy result, then multiplied by total_tested 
    # 99.33 to 0.9933 to 9933
    total_correct = int(total_tested * (result / 100))

    # Convert total_tested and total_correct to strings to set the labels
    label_total_tested.value = "Images Tested: " + str(total_tested)
    label_total_correct.value = "Correct Predictions: " + str(total_correct)

# Define the properties of the button that will calculate accuracy
button_accuracy = pywidgets.Button(description = "Calculate Accuracy")
button_accuracy.style.button_color = "cyan"    
button_accuracy.layout.height = '50px'
button_accuracy.layout.width = '20%'
button_accuracy.on_click(lambda event: accuracy_calculate('button_press_event',
                                                          button_accuracy,
                                                          vbox_accuracy))

# Vertical box with the button and two labels
vbox_accuracy = pywidgets.VBox([button_accuracy,])

# Show the vertical box
display(vbox_accuracy)

In [None]:
def loss_calculate(event, button, vbox):
    
    # Create the labels for the vbox
    label_total_tested = pywidgets.Label(value = '')
    label_total_loss = pywidgets.Label(value = '')
    
    # Add the labels to the vbox that will be displayed below
    vbox.children += (label_total_tested,
                      label_total_loss,)
    
    # Calculate the loss result
    result = result_calculate(0)
    
    # String to hold the number of images tested
    total_tested = len(data_testing)
    
    # String (casted int) to hold the number of incorrect predictions
    # Converted from a percentage to the model's loss result, then multiplied by total_tested 
    # 2.49 to 0.0249 to 248 (the loss was rounded down)
    total_loss = int(total_tested * (result / 100))
    
    # Convert total_tested and total_loss to strings to set the labels
    label_total_tested.value = "Images Tested: " + str(total_tested)
    label_total_loss.value = "Incorrect Predictions: " + str(total_loss)
    
# Define the properties of the button that will calculate loss
button_loss = pywidgets.Button(description = "Calculate Loss")
button_loss.style.button_color = "cyan"
button_loss.layout.height = '50px'
button_loss.layout.width = '20%'
button_loss.on_click(lambda event: loss_calculate('button_press_event',
                                                  button_loss,
                                                  vbox_loss))

# Vertical box with the button and two labels
vbox_loss = pywidgets.VBox([button_loss,])

# Show the vertical box
display(vbox_loss)

In [None]:
def plot_accuracy(): 
    
    # Standard output method for widgets
    output = pywidgets.Output()
    with output:
        fig = plt.figure()
    
    # Set the title of the line chart
    plt.gca().set_title("Digit Recognition - Accuracy")
    
    # Plot the validation accuracy line and set the color
    plt.plot(history['val_acc'],
             color = 'tab:green')
    
    # Set the names of the x-axis and y-axis
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    
    return output

# Show the accuracy chart
display(plot_accuracy())

In [None]:
def plot_accuracy_comparison():
    
    # Standard output method for widgets
    output = pywidgets.Output()
    with output:
        fig = plt.figure()
    
    # Set the title of the line chart
    plt.gca().set_title("Digit Recognition - Accuracy Comparison")
    
    # Plot the training accuracy line and set the name and color
    plt.plot(history['acc'],
             color = 'tab:blue',
             label = "Accuracy - Training")
    
    # Plot the validation accuracy line and set the name and color
    plt.plot(history['val_acc'],
             color = 'tab:green',
             label = "Accuracy - Validation")
    
    # Set the names of the x-axis and y-axis
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    
    # Show the legend with the line chart
    plt.legend()
    
    return output

# Show the accuracy comparison chart
display(plot_accuracy_comparison())

In [None]:
def plot_loss():
    
    # Standard output method for widgets
    output = pywidgets.Output()
    with output:
        fig = plt.figure()
    
    # Set the title of the line chart
    plt.gca().set_title("Digit Recognition - Cross Entropy")
    
    # Plot the validation loss line and set the color
    plt.plot(history['val_loss'],
             color = 'tab:green')
    
    # Set the names of the x-axis and y-axis
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    
    return output

# Show the loss chart
display(plot_loss())

In [None]:
def plot_loss_comparison():

    # Standard output method for widgets
    output = pywidgets.Output()
    with output:
        fig = plt.figure()
        
    # Set the title of the line chart
    plt.gca().set_title("Digit Recognition - Cross Entropy Comparison")
    
    # Plot the training loss line and set the name and color
    plt.plot(history['loss'],
            color = 'tab:blue',
            label = "Loss - Training")
    
    # Plot the validation loss line and set the name and color
    plt.plot(history['val_loss'],
            color = 'tab:green',
            label = "Loss - Validation")
    
    # Set the names of the x-axis and y-axis
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    
    # Show the legend with the line chart
    plt.legend()

    return output

# Show the loss comparison chart
display(plot_loss_comparison())

In [None]:
def plot_digits():

    # Define the array to be used for the x-axis
    axis_x = ['0', '1', '2', '3', '4', 
              '5', '6', '7', '8', '9']
    
    # Assign a variable to keeping track of the index count
    amount_digit = [0] * 10
    
    # Add to the count for each number occurence
    for label in labels_testing:
        amount_digit[label] += 1
    
    # Standard output method for widgets
    output = pywidgets.Output()
    with output:
        fig = plt.figure()
    
    # Define the properties of the bar chart
    plt.gca().set_title("Number of Digits in Test Data")
    plt.gca().set_xlabel("Digit")
    plt.gca().set_ylabel("Amount")
    plt.gca().set_yticks(np.arange(0, 1500, 200))
    plt.bar(axis_x, amount_digit,)
    
    return output

# Show the digit bar chart
display(plot_digits())

In [None]:
# Read the testing data in the MNIST csv file
data_testing_read = pandas.read_csv("../Data/mnist_test.csv")

# Get all of the labels from the testing data
label = data_testing_read['label']

# Drops the labels from the testing data
drop = data_testing_read.drop('label',
                              axis = 1)

# Decomposes the data into a lower dimensional space
decomp = decomposition.PCA()

# Sets the amount of dimensions
decomp.n_components = 2

# Preprocesses the data by changing the type to int
scale = StandardScaler().fit_transform(drop.astype(int))

# Concatenates the arrays and stacks each row vertically
fit_data = np.vstack((decomp.fit_transform(scale).T,label)).T

# Fits the data into a two-dimensional tabular format
data_frame = pandas.DataFrame(data = fit_data, columns = ("First Principal",
                                                          "Second Principal",
                                                          "Legend"))

# Add a title box - Note: Seaborn and Dataframes do not accept titles by default
label_pca = pywidgets.Label("Principal Component Analysis",
                            layout = pywidgets.Layout(
                            display = "flex",
                            justify_content = "center",
                            width = "45%",
                            height = "36px",
                            border = "solid 5px"))

# Display the label for the PCA scatter plot
display(label_pca)

# Create the PCA plot using Seaborn
sb.FacetGrid(data_frame,
             hue = 'Legend',
             height = 10).map(plt.scatter,
                              'First Principal',
                              'Second Principal').add_legend()

In [None]:
# Instruction label to tell users that they can click on the image to get the prediction from the trained model
label_image = pywidgets.Label("Click the Image for a Prediction",
                                layout = pywidgets.Layout(
                                display = "flex",
                                justify_content = "center",
                                width = "21%",
                                height = "36px",
                                border = "solid 5px"))

# Grab a new image from the MNIST dataset
image_create_new = image_create() 

# Define the properties of the button that will grab a new image
button_image_new = pywidgets.Button(description = "Get New Image")
button_image_new.layout.height = '40px'
button_image_new.layout.width = '20%'
button_image_new.style.button_color = "lightblue"
button_image_new.on_click(lambda event: image_new('button_press_event',
                                                  image_create_new))

# Vertical box with instruction label, new image button, [clear button, and image from MNIST]
vbox_image = pywidgets.VBox([label_image,
                             button_image_new,
                             image_create_new])

# Show the vertical box
display(vbox_image)