# Import all required libraries

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from keras.utils.np_utils import to_categorical
from keras.preprocessing import image
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import cv2
%matplotlib inline

# Set all required variables

In [None]:
TRAIN_DATA_PATH = "datasets/train_data" 
#train_data folder has subfolders "A" to "Z", "nothing" and "space"
#Each subfolder has training images in jpg format

TEST_DATA_PATH = "datasets/test_data"
#test_data folder has subfolders "A" to "Z", "nothing" and "space"
#Each subfolder has training images in jpg format

NUM_OF_LETTERS = 28 # "A" to "Z" + "nothing" + "space"
IMAGE_SIZE = 50 #We'll be workign with 50 * 50 pixel images
NUM_OF_CHANNELS = 1 # Grayscale
NUM_OF_TRAIN_IMAGES = 84840 #3030 * 28
NUM_OF_TEST_IMAGES = 28
NUM_OF_DENSE_LAYER_NODES = (IMAGE_SIZE * IMAGE_SIZE) // 2

LABELS = [chr(c) for c in range(ord('A'), ord('Z') + 1)]
LABELS.append("nothing")
LABELS.append("space")

# Creating training and test data from images 

In [None]:
def create_data(DATA_PATH):
    """returns list of [image_array, label_name].
    Keyword arguments:
    DATA_PATH -- Path of train/ test data
    """
    
    data = []
    for label in LABELS:
        path = os.path.join(DATA_PATH, label)
        label_name = LABELS.index(label)
        for img in os.listdir(path):
            try:
                img_array = cv2.imread(os.path.join(path, img))
                img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
                new_img_array = cv2.resize(img_array, (IMAGE_SIZE, IMAGE_SIZE))
                data.append([new_img_array, label_name]) #Stores pair as image as array -> corresponding label name 
            except Exception as e:
                pass
    return data

test_data = create_data(TEST_DATA_PATH)
training_data = create_data(TRAIN_DATA_PATH)

# Making data sets

In [None]:
def make_datasets(data):
    """returns pair of lists of X and y
    Keyword arguments:
    data -- list of [img_array, label_name]
    """
    x = []
    y = []
    for features, label in data:
        x.append(features)
        y.append(label)
    return x, y

X_train, y_train = make_datasets(training_data)
X_test, y_test = make_datasets(test_data)

# Convert datasets to numpy arrays

In [None]:
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)

# Resizing X datasets

In [None]:
X_train = X_train.reshape(NUM_OF_TRAIN_IMAGES, IMAGE_SIZE , IMAGE_SIZE, NUM_OF_CHANNELS)
X_test = X_test.reshape(NUM_OF_TEST_IMAGES, IMAGE_SIZE , IMAGE_SIZE, NUM_OF_CHANNELS)

# Normalizing X array as [0, 1]

In [None]:
X_train = X_train.astype("float32")
X_train /= 255.0

X_test = X_test.astype("float32")
X_test /= 255.0

# Resizing Y array using one hot encoding

In [None]:
y_train = to_categorical(y_train, NUM_OF_LETTERS)
y_test = to_categorical(y_test, NUM_OF_LETTERS)

# Creating CNN Model

In [None]:
model = Sequential()

model.add(Conv2D(IMAGE_SIZE, (3, 3), padding = "same", input_shape = (IMAGE_SIZE, IMAGE_SIZE, NUM_OF_CHANNELS), activation = "relu"))
model.add(Conv2D(IMAGE_SIZE, (3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(2 * IMAGE_SIZE, (3, 3), padding = "same", activation = "relu"))
model.add(Conv2D(2 * IMAGE_SIZE, (3, 3), activation = "relu"))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(NUM_OF_DENSE_LAYER_NODES, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(NUM_OF_LETTERS, activation = "softmax"))

# Compiling CNN Model

In [None]:
model.compile(
    optimizer = "adam", 
    loss = "categorical_crossentropy", 
    metrics = ["accuracy"]
)

In [None]:
model.summary()

# Training Model

In [None]:
history = model.fit(
    X_train, 
    y_train,
    batch_size = 32,
    epochs = 3,
    validation_data = (X_test, y_test),
    shuffle = True
)

# Saving Model and datasets for future use

In [None]:
#Saving Model
from keras.models import load_model
model.save('my_model.h5') 

#Saving Data
np.save("X_train.npy", X_train)
np.save("y_train.npy", y_train)
np.save("X_test.npy", X_test)
np.save("y_test.npy", y_test)

# Calculating Test Score

In [None]:
score = model.evaluate(X_test, y_test)
score

# Recognizing equivalent alphabet from input image

In [None]:
IMG_PATH = "tesy5.jpg"
def which_letter(IMG_PATH):
    img_array = cv2.imread(IMG_PATH)
    img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY)
    new_img_array = cv2.resize(img_array, (IMAGE_SIZE, IMAGE_SIZE))
    plt.imshow(new_img_array)
    new_img_array = new_img_array.reshape(50, 50, 1)
    list_of_images = np.expand_dims(new_img_array, axis = 0)
    results = model.predict(list_of_images)
    single_result = results[0]
    most_likely_class_index = int(np.argmax(single_result))
    class_likely_hood = single_result[most_likely_class_index]
    return LABELS[most_likely_class_index]

print(which_letter(IMG_PATH))