In [None]:
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from emnist import extract_training_samples, extract_test_samples

In [None]:
# Load EMNIST data
(X_train, y_train), (X_test, y_test) = (extract_training_samples('letters'), extract_test_samples('letters'))

# Normalize the data
X_train = tf.keras.utils.normalize(X_train, axis=1)
X_test = tf.keras.utils.normalize(X_test, axis=1)

In [None]:
mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalizing the data (making length = 1)
X_train = tf.keras.utils.normalize(X_train, axis=1)
X_test = tf.keras.utils.normalize(X_test, axis=1)

In [None]:
# Load EMNIST dataset
emnist_train = pd.read_csv('emnist-letters-train.csv')
emnist_test = pd.read_csv('emnist-letters-test.csv')

In [None]:
# Preprocess the data
X_train = emnist_train.iloc[:, 1:].values.astype('float32') / 255.0  # Normalize pixel values
y_train = emnist_train['label'].values
X_test = emnist_test.iloc[:, 1:].values.astype('float32') / 255.0
y_test = emnist_test['label'].values

# Split the data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [None]:
# Create an artificial neural network model
model = tf.keras.models.Sequential()
# Add one flattened input layer for the pixels
model.add(tf.keras.layers.Flatten(input_shape=X_train.shape[1:]))
# Add two dense hidden layers
model.add(tf.keras.layers.Dense(units=128, activation="relu"))
model.add(tf.keras.layers.Dense(units=128, activation="relu"))
# Add dropout layer to avoid overfitting
model.add(tf.keras.layers.Dropout(0.5))
# Add two dense hidden layers
model.add(tf.keras.layers.Dense(units=128, activation="relu"))
model.add(tf.keras.layers.Dense(units=128, activation="relu"))
# Add one dense output layer for the 26 digits
model.add(tf.keras.layers.Dense(units=26, activation="softmax"))

In [None]:
# Compiling and optimizing model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Training the model
model.fit(X_train, y_train, epochs=3)

In [None]:
# Evaluating the model
val_loss, val_acc = model.evaluate(X_test, y_test)
print(val_loss)
print(val_acc)

In [None]:
# Saving the model
model.save('handwrittenOCR.model')

In [None]:
#REPLACED WITH GUI
# Load custom images and predict them
image_number = 1
while os.path.isfile('digits/digit{}.png'.format(image_number)):
    try:
        img = cv2.imread('digits/digit{}.png'.format(image_number))[:,:,0]
        img = np.invert(np.array([img]))
        prediction = model.predict(img)
        print("The number is probably a {}".format(np.argmax(prediction)))
        plt.imshow(img[0], cmap=plt.cm.binary)
        plt.show()
        image_number += 1
    except:
        print("Error reading image! Proceeding with next image...")
        image_number += 1