## Import libraries

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

from tensorflow import keras

## Dataset path

In [None]:
data_path = "../data/emnist/"

train_data_path = data_path + "emnist-balanced-train.csv"
test_data_path = data_path + "emnist-balanced-test.csv"

## Loading train and test datasets

In [None]:
train_df = pd.read_csv(train_data_path, header=None)
test_df = pd.read_csv(test_data_path, header=None)

In [None]:
print(f'The train dataset contains {train_df.shape[0]} observation and {train_df.shape[1]} characterisitics (which one is dependant variable, and rest are independant variables)')

In [None]:
print(f'The test dataset contains {test_df.shape[0]} observation and {test_df.shape[1]} characterisitics')

In [None]:
train_df.sample(5, random_state=31)

- The first column in the dataframe represents the class label (see mappings.txt for class label definitions)

In [None]:
class_mapping = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabdefghnqrt'

### Plotting images from dataset

In [None]:
def plot_image(df,row,title=None):
    img = df.values[row, 1:].reshape([28,28])
    plt.imshow(img, cmap="Greys_r")
    if title is None:
        label = class_mapping[df.values[row,0]]
        plt.title(f'Label = {label}')
    else:
        plt.title(title)
    plt.show()

In [None]:
from random import randrange
num = randrange(len(train_df))
plot_image(train_df, num)

### The data is flipped

In [None]:
def plot_flipped_image(df, row, title=None):
    img = df.values[row, 1:].reshape([28,28])
    img_flipped = np.transpose(img, axes=[1,0])
    plt.imshow(img_flipped, cmap="Greys_r")
    if title is None:
        label = class_mapping[df.values[row,0]]
        plt.title(f'Label = {label}')
    else:
        plt.title(title)
    plt.show()

In [None]:
plot_flipped_image(train_df, num)

## Split X and y

In [None]:
train_X, train_y = train_df.iloc[:,1:].values, train_df.iloc[:,0].values
test_X, test_y = test_df.iloc[:,1:].values, test_df.iloc[:,0].values

In [None]:
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)

## Preprocessing

### Rotating images

In [None]:
img_size = int(np.sqrt(len(train_df.columns)-1))
def rotate(image):
    image = image.reshape([img_size, img_size])    
    image = np.transpose(image,axes=[1,0])
    return image

In [None]:
train_X = np.apply_along_axis(rotate, 1, train_X)

print(train_X.shape, train_X.dtype)

In [None]:
test_X = np.apply_along_axis(rotate, 1, test_X)

print(test_X.shape, test_X.dtype)

### Normalisation

In [None]:
train_X = train_X.astype('float64')
test_X = test_X.astype('float64')

train_X /= 255.0
test_X /= 255.0

In [None]:
for i in range(9):
    plt.subplot(330 + (i+1))
    plt.imshow(train_X[i], cmap=plt.get_cmap('gray'))
    plt.title(class_mapping[train_y[i]])



### One-hot encoding targets

In [None]:
num_classes = len(train_df[0].unique())
train_y = keras.utils.to_categorical(train_y, num_classes)
test_y = keras.utils.to_categorical(test_y, num_classes)

### Reshaping images for CNN

In [None]:
train_X = train_X.reshape(-1, img_size, img_size, 1) #112800 (28x28x1) images (0-1 range)
test_X = test_X.reshape(-1, img_size, img_size, 1) #18800 (28x28x1) images (0-1 range)
print(train_X.shape, test_X.shape)

## Model building

In [None]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Conv2D(32, kernel_size=5, padding='same', activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Dropout(rate=0.4))
model.add(layers.Conv2D(64, kernel_size=5, activation='relu'))
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Dropout(rate=0.4))
model.add(layers.Conv2D(128, kernel_size=3, activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dropout(rate=0.4))
model.add(layers.Dense(num_classes, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau,ModelCheckpoint
model_path=r"../models/"
filepath = model_path + "model-{epoch:02d}-{val_accuracy:.4f}.h5"

MCP = ModelCheckpoint(filepath,
                        verbose=1,
                        save_best_only=True,
                        monitor='val_accuracy',
                        mode='max')
ES = EarlyStopping(monitor='val_accuracy',
                    min_delta=0,
                    verbose=1,
                    restore_best_weights=True,
                    patience=3,
                    mode='max')
RLP = ReduceLROnPlateau(monitor='val_loss',
                        patience=3,
                        factor=0.2,
                        min_lr=0.0001)

In [None]:
history = model.fit(train_X, train_y, epochs=30,
                    validation_data=(test_X, test_y),
                    callbacks=[MCP,ES,RLP])

In [None]:
import seaborn as sns

q = len(history.history['accuracy'])

plt.figsize=(10,10)
sns.lineplot(x = range(1,1+q),y = history.history['accuracy'], label='Accuracy')
sns.lineplot(x = range(1,1+q),y = history.history['val_accuracy'], label='Val_Accuracy')
plt.xlabel('epochs')
plt.ylabel('Accuray')

In [None]:
def plot_metric(history, metric):
    train_metrics = history.history[metric]
    val_metrics = history.history['val_'+metric]
    epochs = range(1, len(train_metrics) + 1)
    plt.plot(epochs, train_metrics)
    plt.plot(epochs, val_metrics)
    plt.title('Training and validation '+ metric)
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend(["train_"+metric, 'val_'+metric])
    plt.show()

In [None]:
plot_metric(history, 'loss')

## Predictions

### Predict using test set

In [None]:
def run_prediction(row):
    input_ = test_X[row].reshape(1,28,28,1)
    result = np.argmax(model.predict(input_))
    prediction = class_mapping[result]
    label = test_df.values[row,0]
    plot_flipped_image(test_df, row, title=f"Prediction {prediction} - Label(origin) {class_mapping[label]}")

In [None]:
import random
import time

for _ in range(1,10):
    idx = random.randint(0, len(test_df))
    run_prediction(idx)

### Predict using custom images

In [None]:
from PIL import Image
from numpy import asarray
import os

def pred(path):
    image = Image.open(path)
    image = image.convert('L')
    # convert image to numpy array
    data = asarray(image)
    # convert array to 28x28 array (matrix)
    img = data.reshape([28,28])
    data = data.reshape(1,28,28,1)
    # normalize image
    d = data / 255.0
    # run prediction
    result = np.argmax(model.predict(d))
    prediction = class_mapping[result]
    label = os.path.split(path)[-1].split('.')[0][0]
    #label = label.split('.')[-1][0]
    plt.imshow(img, cmap="Greys_r")
    plt.title(f'Prediction {prediction} - Label(origin) {label}')
    plt.show()


In [None]:
directory = '..\data\images'


for filename in os.listdir(directory):
    f = os.path.join(directory, filename)

    if os.path.isfile(f):
        pred(f)