In [47]:
import numpy as np
from keras.datasets import mnist
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.callbacks import EarlyStopping
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam
from keras.utils import to_categorical
from keras import backend as K
from keras.preprocessing.image import ImageDataGenerator
from scipy.io import loadmat

In [48]:
# Load the EMNIST dataset of handwritten letters and digits
emnist = loadmat('emnist-balanced.mat')
X_train = emnist['dataset']['train'][0, 0]['images'][0, 0]
y_train = emnist['dataset']['train'][0, 0]['labels'][0, 0]
X_test = emnist['dataset']['test'][0, 0]['images'][0, 0]
y_test = emnist['dataset']['test'][0, 0]['labels'][0, 0]

In [49]:
# Preprocess the data
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32') / 255
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32') / 255
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [50]:
# Define the CNN architecture
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(47, activation='softmax'))

In [51]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])

In [52]:
# Augment the training data with image transformations
datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1)
datagen.fit(X_train)

In [53]:
# Train the model
batch_size = 128
epochs = 1
early_stopping = EarlyStopping(monitor='val_loss', patience=2)
history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), epochs=epochs, validation_data=(X_test, y_test), callbacks=[early_stopping])

  history = model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), epochs=epochs, validation_data=(X_test, y_test), callbacks=[early_stopping])




In [54]:
# Evaluate the model on the testing set
score = model.evaluate(X_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.520604133605957
Test accuracy: 0.8288297653198242


In [55]:
# save the model to disk
import pickle
filename = 'finalized_model.sav'
pickle.dump(model, open(filename, 'wb'))

In [63]:
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))
y_pred = loaded_model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)



In [64]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
accuracy = accuracy_score(y_true, y_pred)
print('Test accuracy:', accuracy)

Test accuracy: 0.8288297872340425
