In [None]:
import os
import cv2
import glob
import random
import string
import win32gui
import idx2numpy
import numpy as np
import tkinter as tk
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt


from glob import glob
from tkinter import *
from pathlib import Path
from PIL import Image, ImageDraw, ImageOps, ImageGrab
from concurrent.futures import ThreadPoolExecutor

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint

from keras.utils import np_utils
from keras.models import load_model

In [None]:
# Don't forget to download the EMNIST dataset from
# https://www.kaggle.com/datasets/crawford/emnist
# and extract it to the emnist folder

In [None]:
##### Etape 1 : extraction du dataset #####

# Function to save an image with the corresponding label in a specified directory
def save_image(image, label, save_dir, index):
    with Image.fromarray(image) as img:
        # Rotate the image by -90 degrees and flip it horizontally
        img = img.rotate(-90).transpose(Image.Transpose.FLIP_LEFT_RIGHT)
        # Define the save path for the image, including the label and index
        save_path = save_dir / str(label) / f"image_{index}.png"
        # Create the directory for the label if it doesn't exist
        save_path.parent.mkdir(parents=True, exist_ok=True)
        # Save the image to the defined path
        img.save(save_path)

# Function to save images and their labels from EMNIST dataset
def save_images(folder):
    # Load images from the EMNIST dataset
    images = idx2numpy.convert_from_file(f"emnist/emnist_source_files/emnist-byclass-{folder}-images-idx3-ubyte")
    # Load labels corresponding to the images
    labels = idx2numpy.convert_from_file(f"emnist/emnist_source_files/emnist-byclass-{folder}-labels-idx1-ubyte")
    # Define the save directory for the images
    save_dir = Path(f"images/{folder}")
    
    # Use ThreadPoolExecutor to save images concurrently for improved performance
    with ThreadPoolExecutor() as executor:
        # Submit tasks to save each image with its corresponding label
        [executor.submit(save_image, images[i], labels[i], save_dir, i) for i in range(images.shape[0])]

# Save training images from the EMNIST dataset
save_images('train')
# Save testing images from the EMNIST dataset
save_images('test')

In [None]:
##### Etape 2 : data augmentation #####

categories = [str(x) for x in range(10, 36)]

# Function to create a dataset with optional data augmentation
def create_data(train_test, aug):
    data_X, data_Y = [], []
    base_path = f"images/{train_test}"
    # Load images and labels for each category
    for class_num, category in enumerate(categories):
        img_paths = glob(os.path.join(base_path, category, '*.png'))
        data_X.extend([cv2.resize(cv2.imread(img_path, cv2.IMREAD_GRAYSCALE), (28, 28)) for img_path in img_paths[:10000]])
        data_Y.extend([class_num] * min(len(img_paths), 10000))
    
    # Convert data to numpy arrays and normalize pixel values
    data_X = np.array(data_X).reshape(-1, 28, 28, 1) / 255.0
    data_Y = np_utils.to_categorical(data_Y)
    
    # Augment the data if specified and it's the training set
    if train_test == 'train' and aug:
        # Define the data augmentation pipeline
        datagen = ImageDataGenerator(rotation_range=10,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    zoom_range=0.1,
                                    horizontal_flip=True)
        
        # Fit the data generator on the training data
        datagen.fit(data_X)
        
        # Generate augmented images and labels for each category
        for class_num, category in enumerate(categories):
            num_samples = sum(data_Y[:, class_num] == 1)
            while num_samples < 10000:
                X_batch = data_X[data_Y[:, class_num] == 1]
                Y_batch = data_Y[data_Y[:, class_num] == 1]
                
                # Generate augmented images and append them to the dataset
                for X_aug_batch, Y_aug_batch in datagen.flow(X_batch, Y_batch, batch_size=len(X_batch)):
                    aug_size = min(len(X_aug_batch), 10000 - num_samples)
                    
                    data_X = np.concatenate([data_X, np.array(X_aug_batch[:aug_size]).reshape(-1, 28, 28, 1)], axis=0)
                    data_Y = np.concatenate([data_Y, np.array(Y_aug_batch[:aug_size])], axis=0)
                    
                    num_samples += len(X_aug_batch)
                    if num_samples >= 10000:
                        break
    
    # Shuffle the data
    data = list(zip(data_X, data_Y))
    random.shuffle(data)
    data_X, data_Y = zip(*data)
    
    # Save the data to a numpy file
    np.savez(f'dataset_{train_test}{"_aug" if aug else ""}', X=data_X, Y=data_Y)

# Create datasets concurrently using ThreadPoolExecutor
with ThreadPoolExecutor() as executor:
    executor.submit(create_data, 'train', True)   # Augmented training set
    executor.submit(create_data, 'train', False)  # Non-augmented training set
    executor.submit(create_data, 'test', False)   # Non-augmented test set


In [None]:
# Define the base path for the training images
base_path = "images/train"
# Initialize a list to store the number of images per category
num_images = []

# Iterate through each class (category) and count the number of images
for class_num in range(10, 36):
    category = str(class_num)
    num_files = len(os.listdir(os.path.join(base_path, category)))
    num_images.append(num_files)

# Create a bar chart to visualize the number of images per class
plt.bar(range(len(categories)), num_images)
# Set the x-axis ticks and labels to represent the categories
plt.xticks(range(len(categories)), categories)
# Set the title of the chart
plt.title('Number of images per class')
# Display the chart
plt.show()

In [None]:
# Load the augmented training dataset from the numpy file
data = np.load('dataset_train_aug.npz')
# Extract the labels by finding the index of the maximum value along the one-hot encoded axis
labels = np.argmax(data['Y'], axis=1)

# Count the number of occurrences of each class in the dataset
counts = [np.sum(labels == i) for i in range(len(categories))]

# Create a bar chart to visualize the number of images per class
plt.bar(categories, counts)
# Set the title of the chart
plt.title('Number of images per class')
# Display the chart
plt.show()

In [None]:
##### Etape 3 : le modèle #####

# Load the training and testing datasets from the numpy files
file = np.load('dataset_train.npz')
train_images, train_labels = file['X'], file['Y']

file = np.load('dataset_test.npz')
test_images, test_labels = file['X'], file['Y']

# Define the model checkpoint callback
checkpoint_callback = ModelCheckpoint('best_model_2.h5', monitor='val_loss', save_best_only=True, mode='min')

# Define the CNN model
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(26, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model and store the training history
history = model.fit(train_images, train_labels, validation_data=(test_images, test_labels), epochs=30, batch_size=32, callbacks=[checkpoint_callback])

# Plot the model accuracy over time (epochs) for training and testing datasets
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

# Plot the model loss over time (epochs) for training and testing datasets
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


In [None]:
# Load the best model from the saved file
model = load_model("best_model.h5")

# Load the testing dataset from the numpy file
file = np.load('dataset_test.npz')
test_images, test_labels = file['X'], file['Y']

# Predict the probabilities for each class using the model
y_probs = model.predict(test_images)

# Determine the class with the highest probability as the predicted class
y_pred = y_probs.argmax(axis=1)

# Convert test labels from one-hot encoded format to integers
test_labels_int = test_labels.argmax(axis=1)

# Calculate the confusion matrix comparing the predicted classes to the true classes
cm = tf.math.confusion_matrix(labels=test_labels_int, predictions=y_pred)

# Plot the confusion matrix using Seaborn's heatmap
plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d')
plt.show()

# Save the confusion matrix plot as an image
plt.savefig('plot.png')

In [None]:
##### Etape 4 : séparation de mots en caractères et interface graphique pour tester notre écriture #####

# Load the best model from the saved file
model = load_model("best_model.h5")

# Function to predict the letter from the image
def predict_digit(img):
    # Resize image to 28x28 pixels
    img = img.resize((28,28))
    # Convert RGB to grayscale
    img = img.convert('L')
    img = np.array(img)
    # Reshape to support our model input and normalize
    img = img.reshape(1,28,28,1)
    img = img/255.0
    # Predict the class
    res = model.predict([img])[0]
    return string.ascii_uppercase[np.argmax(res)], max(res)

# Define the main application class
class App(tk.Tk):
    def __init__(self):
        tk.Tk.__init__(self)
        self.x = self.y = 0
        # Creating elements
        self.canvas = tk.Canvas(self, width=900, height=300, bg = "white", cursor="cross")
        self.label = tk.Label(self, text="Thinking..", font=("Helvetica", 32))
        self.classify_btn = tk.Button(self, text = "Recognise", command = self.classify_handwriting)
        self.button_clear = tk.Button(self, text = "Clear", command = self.clear_all)
        # Grid structure
        self.canvas.grid(row=0, column=0, pady=2, sticky=W, )
        self.label.grid(row=0, column=1,pady=2, padx=2)
        self.classify_btn.grid(row=1, column=1, pady=2, padx=2)
        self.button_clear.grid(row=1, column=0, pady=2)
        # Bind canvas events
        self.canvas.bind("<B1-Motion>", self.draw_lines)

    # Function to clear the canvas
    def clear_all(self):
        self.canvas.delete("all")
        self.label.configure(text="Thinking..")

        # Function to group connected components by line
    def group_components_by_line(self, stats, y_tolerance=20):
        # Sort the stats array (excluding the background component at index 0) by the top-left y-coordinate (stat[1]) and then by the top-left x-coordinate (stat[0])
        stats_sorted = sorted(stats[1:], key=lambda stat: (stat[1], stat[0]))
        
        # Initialize the list of grouped stats and the current line
        grouped_stats = []
        current_line = []

        # Iterate through the sorted stats
        for stat in stats_sorted:
            # If the current line is empty, add the first stat to the line
            if not current_line:
                current_line.append(stat)
            # If the vertical distance between the current stat and the previous stat is less than or equal to y_tolerance,
            # it means they belong to the same line, so add the current stat to the current line
            elif abs(stat[1] - current_line[-1][1]) <= y_tolerance:
                current_line.append(stat)
            # If the vertical distance is greater than y_tolerance, it means we have reached a new line
            else:
                # Sort the current line by the top-left x-coordinate (s[0]) and append it to the grouped_stats list
                grouped_stats.append(sorted(current_line, key=lambda s: s[0]))
                # Reset the current line to start a new line with the current stat
                current_line = [stat]

        # If there are any remaining stats in the current line, sort them by the top-left x-coordinate (s[0]) and append them to the grouped_stats list
        if current_line:
            grouped_stats.append(sorted(current_line, key=lambda s: s[0]))

        return grouped_stats

    # Function to classify the handwriting on the canvas
    def classify_handwriting(self):
        HWND = self.canvas.winfo_id() # Get the handle of the canvas
        rect = win32gui.GetWindowRect(HWND) # Get the coordinate of the canvas
        im = ImageGrab.grab(rect)

        # Preprocess the image
        gray = cv2.cvtColor(np.array(im), cv2.COLOR_BGR2GRAY)
        _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        _, _, stats, _ = cv2.connectedComponentsWithStats(thresh)

        # Group connected components by line
        lines = self.group_components_by_line(stats)

        # Iterate over each line and perform character recognition
        margin_size = 50
        border_color = (255, 255, 255)
        predictions = []

        for line in lines:
            line_predictions = []
            for i in range(len(line)):
                x, y, w, h, _ = line[i]
                # Add a border around the cropped image to avoid cutting off the edges of the character
                image_with_border = ImageOps.expand(im.crop((x, y, x+w, y+h)), border=(margin_size, margin_size, margin_size, margin_size), fill=border_color)
                # Invert the image and resize it to 28x28 pixels
                image_with_border = ImageOps.invert(image_with_border).resize((28,28))
                # Predict the character and its probability
                digit, acc = predict_digit(image_with_border)
                line_predictions.append((digit, acc))
            predictions.append(line_predictions)

        # Display the predictions in a formatted text
        text, word = '', ''
        for prediction in predictions:
            for pred in prediction:
                text += f"Letter: {pred[0]}, Accuracy: {pred[1]:.3f} \n"
                word += pred[0]
            text += '\n'
            word += '\n'
        text += f"Word:\n{word}"
                
        self.label.configure(text=text)

    def draw_lines(self, event):
        r = 8
        self.x = event.x
        self.y = event.y
        self.canvas.create_oval(self.x - r, self.y - r, self.x + r, self.y + r, fill='black')

app = App()
mainloop()