In [None]:
%matplotlib inline

# Varianza contra Sesgo

Basado en https://colab.research.google.com/github/AviatorMoser/keras-mnist-tutorial/blob/master/MNIST%20in%20Keras.ipynb

Autores: Daniel Moser, Xavier Snelgrove, Yash Katariya

Importemos los paquetes necesarios

In [None]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)

import numpy as np                   # advanced math library
import matplotlib.pyplot as plt      # MATLAB like plotting routines
import random                        # for generating random numbers

from keras.datasets import mnist     # MNIST dataset is included in Keras
from keras.models import Sequential  # Model type to be used

from keras.layers.core import Dense, Dropout, Activation # Types of layers to be used in our model
from keras.utils import np_utils                         # NumPy related tools

# Metrics tools
from keras.callbacks import TensorBoard
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_fscore_support

Carguemos el set de datos MNIST original con 70000 imágenes de tamaño 28x28

In [None]:
# The MNIST data is split between 60,000 28 x 28 pixel training images and 10,000 28 x 28 pixel images
(X_train, y_train), (X_test, y_test) = mnist.load_data()

print("X_train shape", X_train.shape)
print("y_train shape", y_train.shape)
print("X_test shape", X_test.shape)
print("y_test shape", y_test.shape)

In [None]:
plt.rcParams['figure.figsize'] = (7,7) # Make the figures a bit bigger

for i in range(9):
    plt.subplot(3,3,i+1)
    num = random.randint(0, len(X_train))
    plt.imshow(X_train[num], cmap='gray', interpolation='none')
    plt.title("Class {}".format(y_train[num]))
    
plt.tight_layout()

In [None]:
# just a little function for pretty printing a matrix
def matprint(mat, fmt="g"):
    col_maxes = [max([len(("{:"+fmt+"}").format(x)) for x in col]) for col in mat.T]
    for x in mat:
        for i, y in enumerate(x):
            print(("{:"+str(col_maxes[i])+fmt+"}").format(y), end="  ")
        print("")

# now print!        
matprint(X_train[num])

In [None]:
X_train = X_train.reshape(60000, 784) # reshape 60,000 28 x 28 matrices into 60,000 784-length vectors.
X_test = X_test.reshape(10000, 784)   # reshape 10,000 28 x 28 matrices into 10,000 784-length vectors.

X_train = X_train.astype('float32')   # change integers to 32-bit floating point numbers
X_test = X_test.astype('float32')

X_train /= 255                        # normalize each value for each pixel for the entire vector for each input
X_test /= 255

print("Training matrix shape", X_train.shape)
print("Testing matrix shape", X_test.shape)

In [None]:
# One hot encoding of the output class

nb_classes = 10 # number of unique digits

Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)

In [None]:
# The Sequential model is a linear stack of layers and is very common.

model = Sequential()

# FIRST LAYER

# The first hidden layer is a set of 512 nodes (artificial neurons).
# Each node will receive an element from each input vector and apply some weight and bias to it.

model.add(Dense(512, input_shape=(784,))) #(784,) is not a typo -- that represents a 784 length vector!

# An "activation" is a non-linear function applied to the output of the layer above.
# It checks the new value of the node, and decides whether that artifical neuron has fired.
# The Rectified Linear Unit (ReLU) converts all negative inputs to nodes in the next layer to be zero.
# Those inputs are then not considered to be fired.
# Positive values of a node are unchanged.

model.add(Activation('relu'))

# Dropout zeroes a selection of random outputs (i.e., disables their activation)
# Dropout helps protect the model from memorizing or "overfitting" the training data.
model.add(Dropout(0.2))

# SECOND LAYER

# The second hidden layer appears identical to our first layer.
# However, instead of each of the 512-node receiving 784-inputs from the input image data,
# they receive 512 inputs from the output of the first 512-node layer.

model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.2))

# OUTPUT LAYER

# The final layer of 10 neurons in fully-connected to the previous 512-node layer.
# The final layer of a FCN should be equal to the number of desired classes (10 in this case).
model.add(Dense(10))
model.add(Activation('softmax'))

# Summarize the built model

model.summary()

## Compiling the model

Keras is built on top of TensorFlow. Both packages allow you to define a computation graph in Python, which then compiles and runs efficiently on the CPU or GPU without the overhead of the Python interpreter.

When compiling a model, Keras asks you to specify your loss function and your optimizer. The loss function we'll use here is called categorical cross-entropy, and is a loss function well-suited to comparing two probability distributions.

Our predictions are probability distributions across the ten different digits (e.g. "we're 80% confident this image is a 3, 10% sure it's an 8, 5% it's a 2, etc."), and the target is a probability distribution with 100% for the correct category, and 0 for everything else. The cross-entropy is a measure of how different your predicted distribution is from the target distribution. More detail at Wikipedia

The optimizer helps determine how quickly the model learns through gradient descent. The rate at which descends a gradient is called the learning rate.

In [None]:
# Let's use the Adam optimizer for learning
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
## TensorBoard Callback
tcb = TensorBoard()

In [None]:
# Train the model

m=min(10000,np.shape(X_train)[0])
history_callback = model.fit(X_train[:m,:], Y_train[:m,:],
                             batch_size=128, epochs=5,
                             verbose=0,
                             validation_data=(X_test, Y_test),
                             callbacks=[tcb])

In [None]:
score = model.evaluate(X_test, Y_test)
print('Test score:', score[0])
print('Test accuracy:', score[1])

In [None]:
acc = history_callback.history['accuracy']
loss = history_callback.history['loss']

val_acc = history_callback.history['val_accuracy']
val_loss = history_callback.history['val_loss']

fig, (ax1,ax2) = plt.subplots(2,figsize=(8,6))
ax1.plot(acc,label="Training Accuracy")
ax1.plot(val_acc,label="Validation Accuracy")
ax2.plot(loss,label="Training Loss")
ax2.plot(val_loss,label="Validation Loss")

ax2.set_xlabel('epochs')
ax1.legend()
ax2.legend()
plt.show()

# Confusion matrix

In [None]:
# Predict on the test data 
pY_test = model.predict(X_test)

# Both the ground-truth Y_test and the preditions pY_test 
# are one-hot encoded and we need a categorial encoding for
# the confusion matrix.  argmax comes to the rescue.
groundTruth = y_test # Y_test.argmax(axis=1)
predictions = pY_test.argmax(axis=1)


cm = confusion_matrix(groundTruth, predictions)
print(cm)

In [None]:
rp = classification_report(groundTruth, predictions)
print(rp)


In [None]:
stats = precision_recall_fscore_support(groundTruth, predictions,average="weighted")

In [None]:
print("Precision={0}, Recall={1}".format(stats[0],stats[1]))