###### --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
###### OCR using KNN
###### --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [1]:
import os
from dotenv import load_dotenv

In [2]:
from PIL import Image
import numpy as np

In [3]:
load_dotenv()

True

In [4]:
DATA_DIR = os.getenv('DATA_DIR')
TEST_DATA = os.getenv('TEST_DATA')
TEST_LABELS = os.getenv('TEST_LABELS')
TRAIN_DATA = os.getenv('TRAIN_DATA')
TRAIN_LABELS = os.getenv('TRAIN_LABELS')
OUTPUT_DATA = os.getenv('OUTPUT_DATA')

In [5]:
TRAIN_DATA

'C:\\Users\\HP\\Anaconda Projects\\Datasets\\Data\\Mnist Digit\\train-images.idx3-ubyte'

In [6]:
# DATA_DIR = r'C:\Users\HP\Anaconda Projects\Datasets\Data\Mnist Digit'
# TEST_DATA = DATA_DIR + r'\t10k-images.idx3-ubyte'
# TEST_LABELS = DATA_DIR + r'\t10k-labels.idx1-ubyte'
# TRAIN_DATA = DATA_DIR + r'\train-images.idx3-ubyte'
# TRAIN_LABELS = DATA_DIR + r'\train-labels.idx1-ubyte'

# OUTPUT_DATA = r'C:\Users\HP\Anaconda Projects\Pojects\OCR\Output'

In [7]:
def read_image(path):
    return np.asarray(Image.open(path).convert('L'))

In [8]:
def write_image(image,path):
    img = Image.fromarray(np.array(image),'L')
    img.save(path)

In [9]:
def int_to_bytes(int_data):
    return int_data.to_bytes(2,'big')

In [10]:
def bytes_to_int(byte_data):
    return int.from_bytes(byte_data,'big')

In [11]:
def read_images(file_name, n_max_images = None):
    images = []
    with open(file_name,'rb') as f:
        magic_number = bytes_to_int(f.read(4))
        n_images = bytes_to_int(f.read(4))
        n_rows = bytes_to_int(f.read(4))
        n_columns = bytes_to_int(f.read(4))
        
        if n_max_images:
            n_images = n_max_images
        
        print('images : ',n_images)
        print('rows : ',n_rows)
        print('columns : ',n_columns)
        
        for index in range(n_images):
            image = []
            for row in range(n_rows):
                row = []
                for col in range(n_columns):
                    pixel = f.read(1)
                    row.append(pixel)
                image.append(row)
            images.append(image)
        return images

In [12]:
def read_labels(file_name, n_max_labels = None):
    labels = []
    with open(file_name, 'rb') as f:
        magic_number = bytes_to_int(f.read(4))
        n_labels = bytes_to_int(f.read(4))
        
        if n_max_labels:
            n_labels = n_max_labels
        
        for index in range(n_labels):
            label = bytes_to_int(f.read(1))
            labels.append(label)
        return labels
            

In [13]:
train_samples = 1000
test_samples = 20

In [14]:
X_train = read_images(TRAIN_DATA, train_samples)

images :  1000
rows :  28
columns :  28


In [15]:
X_test = read_images(TEST_DATA,test_samples)

y_train = read_labels(TRAIN_LABELS,train_samples)

y_test = read_labels(TEST_LABELS,test_samples)

images :  20
rows :  28
columns :  28


In [16]:
def flatten_list(sample):
    return [pixel for row in sample for pixel in row]

def extract_features(X):
    return [flatten_list(sample) for sample in X]

In [17]:
temp = X_train
temp1 = X_test

In [18]:
X_train = temp
X_test = temp1
X_train = extract_features(X_train)
X_test = extract_features(X_test)

In [19]:
def eucledian(train_image, test_image):
    return sum(
    [
       (bytes_to_int(x_i) - bytes_to_int(y_i))**2 for x_i, y_i in zip(train_image,test_image)
    ]
    )**0.5

In [20]:
def get_training_distances(X_train, test_image):
    return [eucledian(train_image, test_image) for train_image in X_train]

In [21]:
def knn(X_train, X_test, y_train, k = 3):
    y_pred = []
    indexes = []
    for test_image_index, test_image in enumerate(X_test):
        training_distances = get_training_distances(X_train, test_image)
        training_distances = [index[0] for index in sorted(enumerate(training_distances),key = lambda x : x[1])]
        guessed_values = [y_train[index] for index in training_distances[:k]]
        y_sample = max(guessed_values, key= guessed_values.count)
        y_pred.append(y_sample)
        indexes.append(training_distances[guessed_values.index(y_sample)])
#         output_image = int_to_bytes(y_train[training_distances[index]])
#         write_image(X_train[training_distances[index]],f'{OUTPUT_DATA}{index}.png')
    return y_pred, indexes

In [22]:
y_pred, indexes = knn(X_train, X_test, y_train, k = 10)
y_pred, indexes

([7, 2, 1, 0, 4, 1, 4, 9, 4, 9, 0, 6, 9, 0, 1, 5, 9, 7, 3, 4],
 [773,
  360,
  466,
  451,
  914,
  466,
  272,
  744,
  237,
  167,
  524,
  488,
  116,
  210,
  678,
  316,
  334,
  522,
  30,
  92])

In [23]:
print(indexes)

[773, 360, 466, 451, 914, 466, 272, 744, 237, 167, 524, 488, 116, 210, 678, 316, 334, 522, 30, 92]


In [24]:
for index in indexes:
    write_image((temp[index]),f'{OUTPUT_DATA}/{y_train[index]}.png')

In [25]:
accuracy = sum([y_pred_i == y_test_i for y_pred_i, y_test_i in zip(y_pred,y_test)]) / len(y_test)
print(accuracy * 100)

95.0


### -------------------------------------------------------------------------------------------------------------------------------------------------------------------