# Georgian Digital Alphabet Recognition

### Preprocessing
add useful imports

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
%matplotlib inline

turns a number to a one hot array

In [3]:
def to_one_hot(values: np.ndarray, max: int) -> np.matrix:
    one_hot = np.zeros((values.size, max))
    rows = np.arange(values.size)
    one_hot[rows, values] = 1    

    return one_hot

import image data and convert images to numpy arrays

In [4]:
import matplotlib.image as im

X_arr = list[np.ndarray]()
Y_arr = list[int]()
num_chars = ord("ჰ") - ord("ა")

for i in range(num_chars):
    char = chr(ord("ა") + i)

    file_path = f"data/images/{char}"
    for image in os.listdir(file_path):
        img_data = im.imread(f"{file_path}/{image}").ravel()

        X_arr.append(np.append(img_data, np.ones(10000 - len(img_data))))
        Y_arr.append(i)

turn list of image values to numpy matrix, values to one_hot vector

In [5]:
X = np.asmatrix(X_arr)
Y = to_one_hot(np.asarray(Y_arr), num_chars)

X.shape, Y.shape

((28160, 10000), (28160, 32))

divide data to train, validation, and test

In [6]:
division_points = (int(X.shape[0] * 0.6), int(X.shape[0] * 0.8))

data = np.append(X, Y, 1)
np.random.shuffle(data)

X = data[:, 0: X.shape[1]]
Y = data[:, X.shape[1]:]

X_train = X[0: division_points[0], :]
Y_train = Y[0: division_points[0], :]

X_valid = X[division_points[0]: division_points[1], :]
Y_valid = Y[division_points[0]: division_points[1], :]

X_test = X[division_points[1]:, :]
Y_test = Y[division_points[1]:, :]

X_train.shape, X_valid.shape, X_test.shape

((16896, 10000), (5632, 10000), (5632, 10000))

### Define neural network model class

In [7]:
class NeuralNetworkModel:
    def __init__(self, in_size: int, out_size: int, hidden_sizes: list[int]) -> None:
        self.layers: list[int] = [in_size] + hidden_sizes + [out_size]
        
        self.thetas = list[np.matrix]()
        self.biases = list[np.matrix]()

        for i in range(len(self.layers) - 1):
            prev_size: int = self.layers[i]
            next_size: int = self.layers[i + 1]

            self.thetas.append(np.asmatrix(np.random.rand(next_size, prev_size) - 0.5))
            self.biases.append(np.asmatrix(np.random.rand(next_size, 1) - 0.5))

    def __ReLU(self, X: np.matrix) -> np.matrix:
        return np.maximum(0, X)

    def __ReLU_grad(self, X: np.matrix) -> np.matrix:
        return np.where(X <= 0, 0, 1)

    def __sigmoid(self, X: np.matrix) -> np.matrix:
        ephsilon = 1e-5 # to avoid returning 1
        X = X.clip(-500, 500) # to prevent overflows

        return np.divide(1, 1 + np.exp(-X) + ephsilon)

    def __forward(self, input: np.matrix, thetas: list[np.matrix]) -> tuple[np.matrix, list[np.matrix]]:
        result: np.matrix = input
        layer_values: list[np.matrix] = [input]

        for i in range(len(self.layers) - 1):
            # z(i)
            result = np.dot(result, thetas[i].T)
            result = np.add(result, self.biases[i].T)
            # a(i)
            if i != len(self.layers) - 2: 
                result = self.__ReLU(result)
                layer_values.append(result)

            else: result = self.__sigmoid(result)

        return result, layer_values

    # forward propagation, but returns array with only 1 and 0s
    def predict(self, input: np.matrix) -> np.matrix:
        # predictions =
        return self.__forward(input, self.thetas)
        # return to_one_hot(np.asarray(predictions.argmax(axis=1)).ravel(), num_chars)

    def cost(self, X: np.matrix, Y: np.matrix, lambd: float, thetas: list[np.matrix]) -> float:
        h, _ = self.__forward(X, thetas)

        # compute cost
        first_term = np.multiply(Y, np.log(h))
        second_term = np.multiply((1 - Y), np.log(1 - h))
        J = np.sum(first_term + second_term) / (-X.shape[0])

        # add regularization
        regularized = 0
        for thetas in thetas:
            regularized += np.sum(np.power(thetas, 2)) * (lambd / (2 * X.shape[0]))

        J += regularized

        return J
    
    # backpropagation
    def gradients(self, X: np.matrix, Y: np.matrix, lambd: float) -> tuple[list[np.matrix], list[np.matrix]]:
        h, a_values = self.__forward(X, self.thetas)
        bias_a = [np.asmatrix(np.ones((X.shape[0], 1))) for _ in a_values]

        grads: list[np.matrix] = [np.zeros(theta.shape) for theta in self.thetas]
        biases: list[np.matrix] = [np.zeros(bias.shape) for bias in self.biases]

        last_delta: np.matrix = np.subtract(h, Y)
        grads[-1] = (grads[-1] + np.dot(last_delta.T, a_values[-1])) / X.shape[0]
        biases[-1] = (biases[-1] + np.dot(last_delta.T, bias_a[-1])) / X.shape[0]

        for i in range(len(a_values) - 1, 0, -1):
            delta = np.multiply(np.dot(last_delta, self.thetas[i]), self.__ReLU_grad(a_values[i]))

            grads[i - 1] = grads[i - 1] + np.dot(delta.T, a_values[i - 1]) / X.shape[0]
            grads[i - 1] = grads[i - 1] + np.multiply(lambd / X.shape[0], self.thetas[i - 1]) # regularization

            biases[i - 1] = (biases[i - 1] + np.dot(delta.T, bias_a[i - 1])) / X.shape[0]

            last_delta = delta

        return grads, biases

    def gradient_check(self, X: np.matrix, Y: np.matrix, lambd: float, grads: list[np.matrix]) -> bool:
        ephsilon = 1e-5
        
        new_theta1 = [np.copy(theta) for theta in self.thetas]
        new_theta2 = [np.copy(theta) for theta in self.thetas]
        preds = [np.zeros(grad.shape) for grad in grads]
        for l in range(len(self.thetas)):
            for i in range(self.thetas[l].shape[0]):
                for j in range(self.thetas[l].shape[1]):
                    new_theta1[l][i, j] = new_theta1[l][i, j] + ephsilon
                    new_theta2[l][i, j] = new_theta2[l][i, j] - ephsilon

                    grad_pred = self.cost(X, Y, lambd, new_theta1) - self.cost(X, Y, lambd, [theta for theta in new_theta2])
                    grad_pred = grad_pred / (2 * ephsilon)
                    preds[l][i, j] = grad_pred

                    new_theta1[l][i, j] = new_theta1[l][i, j] - ephsilon
                    new_theta2[l][i, j] = new_theta2[l][i, j] + ephsilon

        flattened_grads = np.concatenate([np.asarray(grad).ravel() for grad in grads])
        flattened_preds = np.concatenate([np.asarray(pred).ravel() for pred in preds])

        numerator = np.linalg.norm(flattened_grads - flattened_preds)
        denominator = np.linalg.norm(flattened_grads) + np.linalg.norm(flattened_preds)
        diff = numerator / denominator

        if diff > 1e-4:
            print(diff)
            print(grads) 
            print(preds)
            return False

        return True

    def train(self, X: np.matrix, Y: np.matrix, alpha: float, lambd: float, max_iters: int = 1000) -> np.ndarray:
        cost = np.zeros(max_iters)

        for i in range(max_iters):
            theta_grads, bias_grads = self.gradients(X, Y, lambd)

            assert self.gradient_check(X, Y, lambd, theta_grads)

            for j, _ in enumerate(self.thetas):
                self.thetas[j] = self.thetas[j] - np.multiply(alpha, theta_grads[j])
                self.biases[j] = self.biases[j] - np.multiply(alpha, bias_grads[j])

            # cost[i] = self.cost(X, Y, lambd, self.thetas)

        return cost

### Build a model

In [8]:
nn = NeuralNetworkModel(10000, num_chars, [10, 10])

In [13]:
nn.train(X[: 100, :], Y[: 100, :], 0.01, 0.1, 1)

KeyboardInterrupt: 

In [5]:
whaat = np.asmatrix([
    im.imread(f"data/images/ა/1_aacadhn.ttf_fs_24_bc_256.ა.png").ravel(),
    im.imread(f"data/images/ც/132_aacadhn.ttf_fs_40_bc_256.ც.png").ravel(),
    np.zeros(10000)
])

In [35]:
nn.predict(whaat)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])