## OCR which stands for Optical Character Recognition is a computer vision technique used to identify the different types of handwritten digits that are used in common mathematics.

In [34]:
import numpy as np 
import cv2 

# Read the image 
image = cv2.imread(r"C:\Users\swath\Downloads\swathy\digits1.png")

# Check if the image was loaded correctly
if image is None:
    print("Error: Unable to load image. Check the file path.")
    exit()

# Gray scale conversion 
gray_img = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

# Divide the image into 5000 small images of size 20x20
divisions = [np.hsplit(row, 100) for row in np.vsplit(gray_img, 50)]

# Convert into Numpy array of size (50, 100, 20, 20)
NP_array = np.array(divisions)

# Preparing train_data and test_data
# Size will be (2500, 400) where 400 = 20x20
train_data = NP_array[:, :50].reshape(-1, 400).astype(np.float32)
test_data = NP_array[:, 50:100].reshape(-1, 400).astype(np.float32)

# Create 10 different labels for each type of digit
k = np.arange(10)
train_labels = np.repeat(k, 250)[:, np.newaxis]
test_labels = np.repeat(k, 250)[:, np.newaxis]

# Initiate kNN classifier
knn = cv2.ml.KNearest_create()

# Perform training of data
knn.train(train_data, cv2.ml.ROW_SAMPLE, train_labels)

# Obtain the output from the classifier by specifying the number of neighbors
ret, output, neighbours, distance = knn.findNearest(test_data, k=3)

# Check if the return value is valid
if ret is None:
    print("Error: The kNN classifier did not return a valid result.")
    exit()

# Check the performance and accuracy of the classifier
# Compare the output with test_labels to find out how many are correct
matched = output == test_labels
correct_OP = np.count_nonzero(matched)

# Calculate the accuracy
accuracy = (correct_OP * 100.0) / output.size

# Display accuracy
print(f"Accuracy: {accuracy}%")


Accuracy: 91.64%
