Imports

In [46]:
import cv2
import pandas as pd
import numpy as np
import os
import random
import matplotlib.pyplot as plt

In [47]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset

Dataset Path Initializations

In [48]:
# Get the dataset paths
train_path = "Datasets/Train"
test_path = "Datasets/Test"

Declare GPU

In [49]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Image Preprocessing Function

In [50]:
def image_preprocess(new_img, fixed_size=32):
    # Change the image to grayscale
    new_img = cv2.cvtColor(new_img, cv2.COLOR_RGB2GRAY)
    new_img = cv2.bitwise_not(new_img)

    # Apply MedianBlur, Histogram Equalization, and Thresholding
    new_img = cv2.medianBlur(new_img, 5)
    new_img = cv2.equalizeHist(new_img)
    _, new_img = cv2.threshold(new_img, 210, 255, cv2.THRESH_BINARY)

    # Resize the image to fixed size
    new_img = cv2.resize(new_img, (fixed_size, fixed_size))

    # Normalize pixel values to range [0, 1]
    new_img = new_img / 255.0

    # Return the image
    return new_img

Obtain The Dataset For Training

In [51]:
import cupy as cp

In [52]:
# Initialize variables for storing images and its labels
image_list = []
label_list = []

In [53]:
labels = os.listdir(train_path)

# Get the image, preprocess it, and put it in the list
for idx, name in enumerate(labels):
    full_path = train_path + '/' + name

    for img_name in os.listdir(full_path):
        # Getting Image
        img_path = full_path + '/' + img_name
        new_img = cv2.imread(img_path)
        new_img = cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB)
        
        # Preprocess Image
        processed_img = image_preprocess(new_img)
        
        # Insert to the array
        image_list.append(processed_img)
        label_list.append(name)

Obtain The Dataset For Testing

In [54]:
image_test = []
label_test = []

In [55]:
labels2 = os.listdir(test_path)

# Get the image, preprocess it, and put it in the list
for idx, name in enumerate(labels2):
    full_path = test_path + '/' + name

    for img_name in os.listdir(full_path):
        # Getting Image
        img_path = full_path + '/' + img_name
        new_img = cv2.imread(img_path)
        new_img = cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB)
        
        # Preprocess the image
        processed_img = image_preprocess(new_img)
        
        # Insert to the array
        image_test.append(processed_img)
        label_test.append(name)

Shuffle The Order For The Training Dataset

In [56]:
# Combine the dataset first
combined_dataset = list(zip(image_list, label_list))

# Shuffle the order
random.shuffle(combined_dataset)

# Unzip the data
image_list, label_list = zip(*combined_dataset)

# Put it back
image_list = list(image_list)
label_list = list(label_list)

Convert Categorical Data to Numerical Data

In [57]:
# Change label to numeric
label_order = [str(i) for i in range(10)] + [chr(i) for i in range(ord('A'), ord('Z') + 1)]
label_to_index = {label: idx for idx, label in enumerate(label_order)}
numeric_labels = [label_to_index[label] for label in label_list]
numeric_labels_test = [label_to_index[label] for label in label_test]
print("Label to Index Mapping:", label_to_index)

Label to Index Mapping: {'0': 0, '1': 1, '2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, '9': 9, 'A': 10, 'B': 11, 'C': 12, 'D': 13, 'E': 14, 'F': 15, 'G': 16, 'H': 17, 'I': 18, 'J': 19, 'K': 20, 'L': 21, 'M': 22, 'N': 23, 'O': 24, 'P': 25, 'Q': 26, 'R': 27, 'S': 28, 'T': 29, 'U': 30, 'V': 31, 'W': 32, 'X': 33, 'Y': 34, 'Z': 35}


MDRNN Model

In [58]:
# Custom Dataset
class HandwrittenDigitDataset(Dataset):
    def __init__(self, images, labels):
        self.images = torch.tensor(images, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

In [59]:
class MDRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MDRNN, self).__init__()
        self.rnn_x = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.rnn_y = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        print(f"Input shape: {x.shape}")
        batch_size, height, width = x.size()

        # Process rows
        x_rows = x.transpose(1, 2)
        _, (h_x, _) = self.rnn_x(x_rows)

        # Process columns
        x_cols = x
        _, (h_y, _) = self.rnn_y(x_cols)

        # Concatenate the final hidden states from both RNNs
        h_x = h_x.squeeze(0)
        h_y = h_y.squeeze(0)
        features = torch.cat((h_x, h_y), dim=1)

        # Debug features shape
        print(f"Features shape: {features.shape}")
        output = self.fc(features)

        # Return the output
        return output

Initialize Hyperparameters

In [60]:
# Parameters
input_size = 32  # Fixed descriptor size
hidden_size = 128
num_classes = len(set(label_list))  # Number of unique labels
epochs = 10
batch_size = 32
learning_rate = 0.001

Getting The Datasets Ready

In [61]:
#X_train, X_test, y_train, y_test = train_test_split(image_list, numeric_labels, test_size=0.2, random_state=42)
X_train = image_list
y_train = numeric_labels

X_test = image_test
y_test = numeric_labels_test

# Create DataLoader
train_dataset = HandwrittenDigitDataset(X_train, y_train)
test_dataset = HandwrittenDigitDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

  self.images = torch.tensor(images, dtype=torch.float32)


Model Training

In [62]:
# Initialize Model
model = MDRNN(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [63]:
# Training Loop
for epoch in range(epochs):
    model.to(device)
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        # Move data and labels to the same device as the model
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch [{epoch + 1}/{epochs}], Loss: {running_loss / len(train_loader):.4f}")


Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape:

Model Evaluation

In [64]:
# Evaluation
model.eval()
y_pred = []
y_true = []
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())

accuracy = accuracy_score(y_true, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape: torch.Size([32, 32, 32])
Features shape: torch.Size([32, 256])
Input shape:

Save The Model

In [65]:
# Save the model
torch.save(model.state_dict(), "handwritten_digit_mdrnn.pth")

Load The Saved Model

In [66]:
model2 = MDRNN(input_size=input_size, hidden_size=hidden_size, num_classes=num_classes)
model2.load_state_dict(torch.load("handwritten_digit_mdrnn.pth"))

  model2.load_state_dict(torch.load("handwritten_digit_mdrnn.pth"))


<All keys matched successfully>

In [67]:
# Put Model2 to GPU
model2.to(device)

MDRNN(
  (rnn_x): LSTM(32, 128, batch_first=True)
  (rnn_y): LSTM(32, 128, batch_first=True)
  (fc): Linear(in_features=256, out_features=36, bias=True)
)

Preprocessing Function For Uploaded Image To Be Detected

In [68]:
def preprocess_single_image(image, fixed_size=32):
    new_img = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # Noise removal using Gaussian Blur
    new_img = cv2.GaussianBlur(new_img, (5, 5), 0)

    # Thresholding to binarize the image
    _, new_img = cv2.threshold(new_img, 254, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    # Morphological closing to remove small noise
    kernel = np.ones((3, 3), np.uint8)
    new_img = cv2.morphologyEx(new_img, cv2.MORPH_CLOSE, kernel, iterations=2)

    # Resize and bolden the character
    new_img = cv2.resize(new_img, (512, 512), interpolation=cv2.INTER_CUBIC)

    # Final smoothing
    new_img = cv2.GaussianBlur(new_img, (3, 3), 0)

    # Bolden the digit further using dilation
    kernel_thick = np.ones((10, 10), np.uint8)
    new_img = cv2.dilate(new_img, kernel_thick, iterations=2)
    
    _, new_img = cv2.threshold(new_img, 252, 255, cv2.THRESH_BINARY)

    # Resize for model and Normalize pixel values
    new_img = cv2.resize(new_img, (fixed_size, fixed_size))
    new_img = new_img / 255.0
    
    # Convert to tensor and add batch dimension
    new_img = torch.tensor(new_img, dtype=torch.float32).unsqueeze(0)
    return new_img

In [69]:
# Code For Debug Purposes

#image_path = "Datasets/Test2/Kennard_4.png"
#image_debug = cv2.imread(image_path)
#image_debug = cv2.cvtColor(image_debug, cv2.COLOR_BGR2RGB)
#image_debug = preprocess_single_image(image_debug)

#cv2.imshow('Black and White Image', image_debug)
#cv2.waitKey(0)

error: OpenCV(4.10.0) :-1: error: (-5:Bad argument) in function 'imshow'
> Overload resolution failed:
>  - mat is not a numpy array, neither a scalar
>  - Expected Ptr<cv::cuda::GpuMat> for argument 'mat'
>  - Expected Ptr<cv::UMat> for argument 'mat'


Function To Predict The Uploaded Image

In [70]:
def predict_single_image(model, image_tensor, device, label_mapping):
    # Ensure the model is in evaluation mode
    model.eval()
    
    # Move image tensor to the same device as the model
    image_tensor = image_tensor.to(device)
    
    # No gradient calculation needed
    with torch.no_grad():
        output = model(image_tensor)
        _, predicted = torch.max(output, 1)
        
    # Map the predicted index to the corresponding label
    predicted_label = label_mapping[predicted.item()]
    return predicted_label

Predict The Uploaded Image

In [76]:
# Path to the image to be predicted
image_path = "Datasets/Test2/Evan_W.jpg"
image_prediction = cv2.imread(image_path)
image_prediction = cv2.cvtColor(image_prediction, cv2.COLOR_BGR2RGB)

# Preprocess the image
image_tensor = preprocess_single_image(image_prediction)

# Map indices back to labels
index_to_label = {idx: label for label, idx in label_to_index.items()}

# Predict the label
predicted_label = predict_single_image(model2, image_tensor, device, index_to_label)
print(f"Predicted Label: {predicted_label}")

Input shape: torch.Size([1, 32, 32])
Features shape: torch.Size([1, 256])
Predicted Label: U


Gradio Interface

In [77]:
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [78]:
def recognize_handwriting(image):
    # Preprocess the image
    image_required = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_tensor = preprocess_single_image(image_required)

    # Map indices back to labels
    index_to_label = {idx: label for label, idx in label_to_index.items()}

    # Predict the label
    predicted_label = predict_single_image(model, image_tensor, device, index_to_label)
    
    return f"Character recognized: {predicted_label}"

In [79]:
interface = gr.Interface(fn=recognize_handwriting,
                     inputs=gr.Image(),
                     outputs="text",
                     title="Handwriting Recognition",
                     description="Upload an image of handwritten characters")

# This will open the interface in your default web browser.
interface.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Input shape: torch.Size([1, 32, 32])
Features shape: torch.Size([1, 256])
Input shape: torch.Size([1, 32, 32])
Features shape: torch.Size([1, 256])
Input shape: torch.Size([1, 32, 32])
Features shape: torch.Size([1, 256])


ERROR:    Exception in ASGI application
Traceback (most recent call last):
  File "c:\Users\evanl\AppData\Local\Programs\Python\Python311\Lib\site-packages\uvicorn\protocols\http\h11_impl.py", line 403, in run_asgi
    result = await app(  # type: ignore[func-returns-value]
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\evanl\AppData\Local\Programs\Python\Python311\Lib\site-packages\uvicorn\middleware\proxy_headers.py", line 60, in __call__
    return await self.app(scope, receive, send)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\evanl\AppData\Local\Programs\Python\Python311\Lib\site-packages\fastapi\applications.py", line 1054, in __call__
    await super().__call__(scope, receive, send)
  File "c:\Users\evanl\AppData\Local\Programs\Python\Python311\Lib\site-packages\starlette\applications.py", line 113, in __call__
    await self.middleware_stack(scope, receive, send)
  File "c:\Users\evanl\AppData\Local\Programs\Python\Python311\Li

Input shape: torch.Size([1, 32, 32])
Features shape: torch.Size([1, 256])
Input shape: torch.Size([1, 32, 32])
Features shape: torch.Size([1, 256])
