## **Loading Necessary Libraries and Drive Mounting**

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install emnist

Collecting emnist
  Downloading emnist-0.0-py3-none-any.whl (7.3 kB)
Installing collected packages: emnist
Successfully installed emnist-0.0


## Extracting from the emnist datasets and checking shape

In [None]:
from emnist import extract_training_samples
images, labels = extract_training_samples('bymerge')
n_images = np.array(images.astype('float32'))
n_labels = np.array(labels)

Downloading emnist.zip: 536MB [00:06, 92.3MB/s]


In [None]:
from emnist import extract_test_samples
testimages, testlabels = extract_test_samples('bymerge')
testimages = np.array(testimages.astype('float32'))
testlabels = np.array(testlabels)

In [None]:
print(n_images.shape)
print(testimages.shape)

(697932, 28, 28)
(116323, 28, 28)


# ***Preprocessing***

In [None]:
#Data Centering
center_function = lambda x: x - x.mean()

n_images = center_function(n_images)
testimages = center_function(testimages)


In [None]:
#Normalisation
train_images = n_images / 255.0
test_images = testimages / 255.0

In [None]:
#Reshaping training and test images to feed into the model
train_images_number = train_images.shape[0]
train_images_height = 28
train_images_width = 28
train_images_size = train_images_height*train_images_width

train_images = train_images.reshape(train_images_number, train_images_height, train_images_width, 1)

test_images_number = test_images.shape[0]
test_images_height = 28
test_images_width = 28
test_images_size = test_images_height*test_images_width

test_images = test_images.reshape(test_images_number, test_images_height, test_images_width, 1)

In [None]:
#Converting the labels to categorical and checking the training and test shape
final_train_x = train_images
print("Shape of train data: ", final_train_x.shape)

final_test_x = test_images
print("Shape of train data: ", final_test_x.shape)
print()


final_train_y = to_categorical(n_labels, num_classes = 62, dtype='int')
print("New shape of train labels: ", final_train_y.shape)

final_test_y = to_categorical(testlabels, num_classes = 62, dtype='int')
print("New shape of test labels: ", final_test_y.shape)

Shape of train data:  (697932, 28, 28, 1)
Shape of train data:  (116323, 28, 28, 1)

New shape of train labels:  (697932, 62)
New shape of test labels:  (116323, 62)


### **Saving the preprocessed dataset**

In [None]:
np.save('/content/drive/My Drive/DATASET/final_train_x.npy',final_train_x)
np.save('/content/drive/My Drive/DATASET/final_train_y.npy',final_train_y)
np.save('/content/drive/My Drive/DATASET/final_test_x.npy',final_test_x)
np.save('/content/drive/My Drive/DATASET/final_test_y.npy',final_test_y)