# Optical character recognition
This project demonstrates how to predict character from a 5x5 matrix, represented as as vector of n=25 using an only mathematical approach. Only the charracters J, D and N will be used for recognition. Inner products, correlation matrices and dot products are at the center of this project.
By Jaden van Rijswijk.

In [73]:
import numpy as np
import random
from sklearn import metrics
import math

In [None]:
def visualise_character(character) -> None:
    for i in range(5):
        for j in range(5):
            if character[i*5 + j] > 0.5:
                print("█", end="")
            else:
                print(" ", end="")
        print()
        
def visualise_probability_matrix(character) -> None:
    for i in range(5):
        for j in range(5):
            print(f"{character[i*5 + j]:.2f} ", end="")
        print()
        
def make_blurred(character, blur_level=0.2, min_value=0) -> list[int]:
    blurred_character = []
    for value in character:
        if value > 0.5:
            blurred_character.append(random.uniform(max(min_value, value - blur_level), 1))
        else:
            blurred_character.append(value)
            
    assert len(blurred_character) == 25
    return blurred_character

def make_noisy(character, noise_level=0.2, max_value=1) -> list[int]:
    noisy_character = []
    for value in character:
        if value < 0.5:
            noisy_character.append(random.uniform(0, min(value + noise_level, max_value)))
        else:
            noisy_character.append(value)
            
    assert len(noisy_character) == 25
    return noisy_character

In [None]:
noise_level = 0.3
blur_level = 0.3

j_perfect = [1,1,1,1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,1,1,1,1]
d_perfect = [1,1,1,1,0,1,0,0,0,1,1,0,0,0,1,1,0,0,0,1,1,1,1,1,0]
n_perfect = [1,0,0,0,1,1,1,0,0,1,1,0,1,0,1,1,0,0,1,1,1,0,0,0,1]

assert len(j_perfect) == 25
assert len(d_perfect) == 25
assert len(n_perfect) == 25

j_blurred = make_blurred(j_perfect, blur_level=blur_level)
j_noisy = make_noisy(j_perfect, noise_level=noise_level)
j_noisy_blurred = make_noisy(j_blurred, noise_level=noise_level)

d_blurred = make_blurred(d_perfect, blur_level=blur_level)
d_noisy = make_noisy(d_perfect, noise_level=noise_level)
d_noisy_blurred = make_noisy(d_blurred, noise_level=noise_level)

n_blurred = make_blurred(n_perfect, blur_level=blur_level)
n_noisy = make_noisy(n_perfect, noise_level=noise_level)
n_noisy_blurred = make_noisy(n_blurred, noise_level=noise_level)

all_characters = {
    'j': [j_perfect, j_blurred, j_noisy, j_noisy_blurred],
    'd': [d_perfect, d_blurred, d_noisy, d_noisy_blurred],
    'n': [n_perfect, n_blurred, n_noisy, n_noisy_blurred],
}

In [69]:
print(visualise_character(j_perfect))
print(visualise_probability_matrix(j_noisy_blurred))

█████
    █
    █
█   █
 ████
None
0.76 0.94 0.98 0.75 0.79 
0.10 0.10 0.02 0.08 0.80 
0.29 0.09 0.07 0.18 0.78 
0.72 0.20 0.15 0.15 0.87 
0.20 0.84 0.75 0.87 0.85 
None


In [None]:
def magnitude(character):
    sum_squares = sum([x**2 for x in character])
    return math.sqrt(sum_squares)

def inner_product(x, y):
    return sum([x_i* y_i for x_i, y_i in zip(x, y)])

def cosine_similarity(x, y):
    return inner_product(x, y) / (magnitude(x) * magnitude(y))

def find_similarities(character, character_set):
    similarities = []
    for other_character in character_set:
        similarity = cosine_similarity(character, other_character)
        similarities.append(similarity)
    return similarities

ValueError: continuous is not supported