<a href="https://colab.research.google.com/github/CristianCosci/LAB_MachineLearning_course/blob/main/Keras_MLP_EMNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Build, train and test a MLP classification model for the letters MNIST dataset. This dataset is very similar to the one used in the code above, but it has 26 classes instead of 10.

You can install and load the emnist dataset with the following lines (https://libraries.io/pypi/emnist)

In [6]:
import numpy as np
from tensorflow import keras
from keras import layers
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt

In [None]:
!pip install emnist

In [None]:
from emnist import list_datasets
list_datasets()

In [4]:
from emnist import extract_training_samples
image, labels = extract_training_samples('letters')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(image, labels, test_size=0.33, random_state=42)
print(type(X_train))
print(X_train.shape)

print(type(y_train))
print(y_train.shape)

In [None]:
print(np.unique(y_train)) # To print classes

In [None]:
import string
class_names = list(string.ascii_lowercase)
print(class_names)
print(len(class_names))

In [None]:
# Plot some images
plt.figure(figsize=(10,10))
for i, image in enumerate(X_train[0:25]):
  plt.subplot(5,5,i+1)
  plt.xticks([])
  plt.yticks([])
  plt.grid(False)
  plt.imshow(image, cmap='Greys')

plt.show()

In [None]:
# Reshape in a 1d array 
# Convert each image of size 28*28 (2d-vector) into a 1D vector of 1*784
# Reshape the data - MLPs do not understand such things as '2D'.
# Reshape to 28 x 28 pixels = 784 features
feature_vector_length = 28 * 28 # X_train[0].shape[0] * X_train[0].shape[1]
X_train = X_train.reshape(X_train.shape[0], feature_vector_length)
X_test = X_test.reshape(X_test.shape[0], feature_vector_length)

print(X_train.shape)
print(X_test.shape)
print(X_train[0].shape)

In [None]:
print("max pixel values : ",np.max(X_train))
print("min pixel values: ",np.min(X_train))

# Normalization in [0,1]
X_train = X_train.astype("float32")/255 # 255 == np.max(X_train) == np.max(X_test)
X_test = X_test.astype("float32")/255

# Print actual value type 
print("max pixel values : ",np.max(X_train))
print("min pixel values: ",np.min(X_train))

# print new x_train[0]
print("max pixel values : ",np.max(X_train[0]))
print("min pixel values: ",np.min(X_train[0]))
# print(X_train[0])

In [None]:
num_classes = len(np.unique(y_train))
print(num_classes)

In [None]:
model  = keras.Sequential() 

model.add(layers.Dense(512, activation= "relu", input_shape=(784,)))
model.add(layers.Dropout(0.2))
model.add(layers.Dense(256, activation = "relu"))
model.add(layers.Dropout(0.3))
model.add(layers.Dense(num_classes+1, activation= "softmax"))

model.summary()

In [None]:
batch_size = 128
epochs = 15

#crossentropy
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

history = model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

In [None]:
# Evaluate
score = model.evaluate(X_test,y_test)
print(model.metrics_names)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

In [None]:
# Predict class
n = 20
y_pred = model.predict(X_test[n:n+1])
print(y_pred)
y_pred = y_pred.argmax()
print('Predicted class: ', y_pred)

In [None]:
# Check if the record has been classified correctly
import matplotlib.pyplot as plt
X_test_vis = X_test[n].reshape(28, 28)
print(X_test_vis.shape)
plt.imshow(X_test_vis, cmap = plt.cm.binary)

print(class_names[y_pred-1])

In [None]:
import matplotlib.pyplot as plt
# Plot training & validation accuracy values
plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Val'], loc='upper left')
plt.show()

In [86]:
y_pred = y_pred.argmax()

In [105]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
y_pred = model.predict(X_test)
y_pred = y_pred.argmax(axis=1)

matrix = confusion_matrix(y_test, y_pred)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams
sns.set(rc={'figure.figsize':(25,15)})
ax = sns.heatmap(matrix, annot=True, cmap='Blues')

ax.set_title('Seaborn Confusion Matrix with labels\n\n');
ax.set_xlabel('\nPredicted Letters Category')
ax.set_ylabel('Actual Letters Category ');

## Ticket labels - List must be in alphabetical order
ax.xaxis.set_ticklabels(class_names)
ax.yaxis.set_ticklabels(class_names)

## Display the visualization of the Confusion Matrix.
plt.show()

In [None]:
from sklearn.metrics import recall_score, precision_score, classification_report, precision_recall_fscore_support
precision, recall, fscore, support = precision_recall_fscore_support(y_test,y_pred,average=None)
print('Precision : {}'.format(precision))
print('Recall    : {}'.format(recall))
print('F-score   : {}'.format(fscore))
print('Support   : {}'.format(support))