In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.utils import np_utils

In [None]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix

In [None]:
#ignore warning messages 
import warnings
warnings.filterwarnings('ignore') 

In [None]:
# Download dataset from: https://www.kaggle.com/datasets/sachinpatel21/az-handwritten-alphabets-in-csv-format
dataset = pd.read_csv("./A_Z Handwritten Data.csv").astype('float32')
dataset.rename(columns={'0':'label'}, inplace=True)

# Split image data and image class label
X = dataset.drop('label',axis = 1)
y = dataset['label']

In [None]:
# visualize data sample

X_shuffle = shuffle(X)

plt.figure(figsize = (3,2.5), frameon=False)
plt.rcParams["axes.grid"] = False
row, col = 2, 2
for i in range(4):  
    plt.subplot(col, row, i+1)
    plt.imshow(
		X_shuffle.iloc[i].values.reshape(28,28),
		interpolation='nearest', cmap='Greys')
plt.show()

In [None]:

# Change label to alphabets
label_mapper = {
	0:'A', 1:'B', 2:'C', 3:'D', 4:'E', 5:'F', 6:'G', 7:'H', 8:'I', 9:'J', 10:'K', 11:'L', 12:'M', 13:'N', 14:'O', 15:'P', 16:'Q', 17:'R', 18:'S', 19:'T', 20:'U', 21:'V', 22:'W', 23:'X', 24:'Y', 25:'Z'
}
dataset['label'] = dataset['label'].map(label_mapper)

label_size = dataset.groupby('label').size()
label_size.plot.barh(figsize=(5,5))

plt.title("Character class counts")
plt.show()

In [None]:
# split data+labels
X_train, X_test, y_train, y_test = train_test_split(X,y)

# scale data
standard_scaler = MinMaxScaler()
X_train = standard_scaler.fit_transform(X_train)
X_test = standard_scaler.transform(X_test)

In [None]:
X_shuffle = shuffle(X_train)

plt.figure(figsize = (5, 4), frameon=False)
plt.rcParams["axes.grid"] = False
plt.axis('off')
row, col = 2, 2
for i in range(4):  
    plt.subplot(col, row, i+1)
    plt.imshow(
		X_shuffle[i].reshape(28,28),
		interpolation='nearest', cmap='Greys')
plt.show()

In [None]:
# reshaping 1D array to 2D: 784 = 28*28
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')

y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)

In [None]:
# define model

model = Sequential()
model.add(Conv2D(32, (5, 5), input_shape=(28, 28, 1), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(len(y.unique()), activation='softmax'))

In [None]:
# compile model

model.compile(
	loss='categorical_crossentropy',
	optimizer='adam',
	metrics=['accuracy']
)
model.summary()

In [None]:
history = model.fit(
	X_train, y_train,
	validation_data=(X_test, y_test),
	epochs=5, batch_size=200, verbose=2
)

In [None]:
scores = model.evaluate(X_test, y_test, verbose=0)
print("CNN model Score: ", scores[1])

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper right')
plt.show()

In [None]:
pred = model.predict(X_test)

In [None]:
sample_test = X_test[3029].reshape(28, 28)

plt.figure(figsize = (3,2.5), frameon=False)
plt.rcParams["axes.grid"] = False
plt.imshow(
	sample_test,
	interpolation='nearest',
	cmap='Greys'
)
plt.show()

In [None]:
label_mapper[pred[3029].argmax()]

In [None]:
cm = confusion_matrix(
	y_test.argmax(axis=1),
	pred.argmax(axis=1)
)
df_cm = pd.DataFrame(
	cm, range(26), range(26)
)
plt.figure(figsize = (10,7))
sns.set_theme(font_scale=0.7)
sns.heatmap(df_cm, annot=True)

In [None]:
model.save('char_recog.h5')