In [None]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import keras
import tensorflow as tf
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import Accuracy
from tensorflow.keras import layers
from sklearn import metrics
import keras_tuner
import seaborn as sns
import struct
import time
import pickle
import itertools
np.random.seed(0)

In [None]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix. 
    Normalization can be applied by setting normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm. sum (axis=1)[:, np.newaxis] 
        print("Normalized confusion matrix")
    else:    
        print('Confusion matrix, without normalization')
    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product (range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
            horizontalalignment="center",
            color="white" if cm[i, j]> thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

In [None]:
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Flatten())
    model.add(Dense(units=128, input_shape=(784,), activation='relu'))
    model.add(
        layers.Dense(
            # Tune number of units.
            units=hp.Int("units", min_value=32, max_value=512, step=16),
            # Tune the activation function to use.
            activation=hp.Choice("activation", ["relu", "tanh"]),
        )
    )
    model.add(
        layers.Dense(
            # Tune number of units.
            units=hp.Int("units", min_value=96, max_value=512, step=16),
            # Tune the activation function to use.
            activation=hp.Choice("activation", ["relu", "tanh"]),
        )
    )
    # Tune whether to use dropout.
    if hp.Boolean("dropout"):
        model.add(layers.Dropout(rate=0.25))
    model.add(layers.Dense(10, activation="softmax"))
    # Define the optimizer learning rate as a hyperparameter.
    learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    return model

In [None]:
raw_train = read_idx("train-images-idx3-ubyte")
train_data = raw_train / 255.0
train_data = np.reshape(train_data, (60000, 28*28))
train_label = read_idx("train-labels-idx1-ubyte")
raw_test = read_idx("t10k-images-idx3-ubyte")
test_data = raw_test / 255.0
test_data = np.reshape(test_data, (10000, 28*28))
test_label = read_idx("t10k-labels-idx1-ubyte")

In [None]:
num_classes = 10 #number of classes, here is 10 (0,1,...,9)
train_label = keras.utils.to_categorical(train_label, num_classes)
test_label_cat = keras.utils.to_categorical(test_label, num_classes)

In [None]:
build_model(keras_tuner.HyperParameters())

In [None]:
tuner = keras_tuner.RandomSearch(
    hypermodel=build_model,
    objective="val_accuracy",
    max_trials=10,
    executions_per_trial=4,
    overwrite=True,
    directory="NN_Hyper_Test_DIR",
    project_name="NN_Hyper_Test",
)
tuner.search_space_summary()

In [None]:
tuner.search(train_data, train_label, batch_size=4096, epochs=16, validation_data=(test_data, test_label_cat))

In [None]:
# Get the top 2 models.
models = tuner.get_best_models(num_models=2)
best_model = models[0]
# Build the model.
# Needed for `Sequential` without specified `input_shape`.
best_model.build(input_shape=(None, 784))
best_model.summary()

In [None]:
tuner.results_summary()

In [None]:
start = time.time()
# Get the top 2 hyperparameters.
best_hps = tuner.get_best_hyperparameters(5)
# Build the model with the best hp.
model = build_model(best_hps[0])
X_Data = train_data
Y_Data = train_label
epochs=128
batchsize = 4095
model.fit(x=X_Data, y=Y_Data, batch_size = batchsize, epochs = epochs)
end = time.time()
print("NN Train Time: ", end - start,"s")

In [None]:
pkl_filename = "pickle_nn_hyper_model.pkl"
with open(pkl_filename, 'wb') as file:
    pickle.dump(model, file)

In [None]:
pkl_filename = "pickle_nn_hyper_model.pkl"
with open(pkl_filename, 'rb') as file:
    model = pickle.load(file)

In [None]:
start = time.time()
test_loss, test_acc = model.evaluate(test_data, test_label_cat)
print("Test Loss: {}, Test Accuracy: {}".format(test_loss, test_acc))   
end = time.time()
print("NN Predict Time: ", end - start,"s")

In [None]:
start = time.time()
x_test = test_data
y_true = test_label
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)
end = time.time()
print("NN Predict Time: ", end - start,"s")

In [None]:
cm = metrics.confusion_matrix(y_true, y_pred_classes)
plot_confusion_matrix(cm, ["0","1","2","3","4","5","6","7","8","9"]) 

In [None]:
errors = (y_pred_classes - y_true != 0)
y_pred_classes_errors = y_pred_classes[errors]
y_pred_errors = y_pred[errors]
y_true_errors = y_true[errors]
x_test_errors = x_test[errors]

In [None]:
y_pred_errors_probability = np.max(y_pred_errors, axis=1)
true_probability_errors = np.diagonal(np.take(y_pred_errors, y_true_errors, axis=1))
diff_errors_pred_true = y_pred_errors_probability - true_probability_errors

# Get list of indices of sorted differences
sorted_idx_diff_errors = np.argsort(diff_errors_pred_true)
top_idx_diff_errors = sorted_idx_diff_errors[-20:] # 5 last ones

In [None]:
# Show Top Errors
num = len(top_idx_diff_errors)
f, ax = plt.subplots(1, num, figsize=(100,30))

for i in range(0, num):
  idx = top_idx_diff_errors[i]
  sample = x_test_errors[idx].reshape(28,28)
  y_t = y_true_errors[idx]
  y_p = y_pred_classes_errors[idx]
  ax[i].imshow(sample, cmap='gray')
  ax[i].set_title("Predicted label :{}\nTrue label: {}".format(y_p, y_t), fontsize=22)