### Character Recognition Using Single Layer Perceptron Network

In [44]:
import numpy as np
import os
import random
import math

In [45]:
# alpha: Learning Rate
# theta: Threshold
def linear_perceptron_network(inputs, targets, alpha = 1, theta = 0, max_iterations = 5000):
    n = len(inputs[0])  # features count
    m = len(targets[0]) # neurons count
    epsilon = 1e-6
    weights = np.zeros([m, n])
    bias = np.zeros(m)
    trained = False
    epoch = 0
    while epoch < max_iterations and not trained:
        trained = True
        for input, target in zip(inputs, targets):
            h = weights.dot(input) + bias
            h[h > theta] = 1
            h[h < -theta] = -1
            h[(h >= -theta) & (h <= theta)] = 0
            
            for j in range(m):
                h_j = h[j]
                t_j = target[j]
                if abs(h_j - t_j) > epsilon:
                    weights[j] = weights[j] + alpha * input * t_j
                    bias[j] = bias[j] + alpha * t_j
                    trained = False

        epoch += 1
    
    return (trained, weights, bias, epoch)

In [46]:
w = 7 # width of letters
h = 9 # height of letters
letter_mapping = {'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'j': 5, 'k': 6} # for each character which neuron should be active

def read_file(file):
    result = np.zeros((h, w))
    lines = [line.rstrip('\n') for line in open(file)]
    for y, line in enumerate(lines):
        for x, ch in enumerate(line):
            result[y, x] = 1 if ch == '*' else -1
    return result

In [47]:
def train_folder():
    dir_path = './training_data/'
    inputs = []
    targets = []
    for file in os.listdir(dir_path):
        if file.endswith('.txt'):
            ch = file[0]
            matrix = read_file(dir_path + file)
            matrix = matrix.reshape(w * h) # change to one dimension
            inputs.append(matrix)

            # target array represents activation of neurons (1: active, -1: inactive)
            # all neurons should be inactive(-1) except the neuron that represents the current input character
            target = np.array([-1] * len(letter_mapping))
            target[letter_mapping[ch]] = 1
            targets.append(target)

    return linear_perceptron_network(inputs, targets, 1)

trained, weights, bias, epoch = train_folder()
print(f'Training Result: {trained}, Iterations: {epoch}')

Training Result: True, Iterations: 6


In [181]:
def test(input):
    theta = 0
    guesses = {} # guesses of the network for current input
    h = weights.dot(input) + bias # h is vector of neuron values
    reverse_letter_mapping = {y: x for x, y in letter_mapping.items()}
    for i in range(len(h)):
        if h[i] > theta: # if h[i] > threshold, it means ith neuron is active
            guesses[reverse_letter_mapping[i]] = h[i]
    return guesses 

def test_folder():
    text = ''
    dir_path = './test_data/'
    corrects = 0
    total = 0
    for file in os.listdir(dir_path):
        if file.endswith('.txt'):
            ch = file[0]
            matrix = read_file(dir_path + file)
            matrix = matrix.reshape(w * h) # change to one dimension
            guesses = test(matrix)
            
            text += '%s = %s\n' %(file[:-len('.txt')], ', '.join(guesses))
            total += 1
            # check if the highest guess is the correct character
            if len(guesses) > 0 and ch == max(guesses, key=guesses.get):
                corrects += 1
    print(text)
    print(f'Accuracy: {corrects * 100 / total}%')

test_folder()

k1 = k
k2 = k
k3 = k
j3 = j
j2 = j
j1 = j
a1 = a
a2 = a
a3 = a
c1 = c
c3 = c
c2 = c
b2 = b
b3 = b
b1 = b
e3 = e
e2 = e
e1 = e
d1 = d
d2 = d
d3 = d

Accuracy: 100.0%


In [182]:
def make_noisy_input(input, percent):
    x = input.copy()
    num_mistake = math.ceil(percent * w * h / 100)
    for _ in range(num_mistake):
        index = random.randint(0, w * h - 1)
        x[index] = x[index] * -1
        
    return x

def test_noisy():
    dir_path = './training_data/'
    corrects = 0
    total = 0
    noise_percents = [5, 10, 20, 30, 50]
    for noise in noise_percents:
        for file in os.listdir(dir_path):
            if file.endswith('.txt'):
                ch = file[0]
                matrix = read_file(dir_path + file).reshape(w * h)
                matrix = make_noisy_input(matrix, noise)
                guesses = test(matrix)
                
                total += 1
                # check if the highest guess is the correct character
                if len(guesses) > 0 and ch == max(guesses, key=guesses.get):
                    corrects += 1

        print(f'Noise: {noise:2}%, Accuracy: {corrects * 100 / total:.2f}%')
        
        corrects = 0
        total = 0

test_noisy()

Noise:  5%, Accuracy: 100.00%
Noise: 10%, Accuracy: 95.24%
Noise: 20%, Accuracy: 95.24%
Noise: 30%, Accuracy: 85.71%
Noise: 50%, Accuracy: 61.90%
