# Research Paper
## A Statistical Global Feature Extraction Method for Optical Font Recognition
---
[Click here](https://link.springer.com/content/pdf/10.1007%2F978-3-642-20039-7_26.pdf) to open the research paper.

### Research paper summary
We divide the framework of Arabic Calligraphy font recognition system into two
parts: preprocessing and post-processing modules. In the pre-processing, besides
generating texture blocks of the predetermined text, we also include edge deduction
process. Whilst the post processing involves two sub processes such as feature
extraction using our proposed algorithm based on statistical method and recognition
sub processes.

<table id="table-description">
    <style>
        table td:nth-child(2), table th:nth-child(2) { text-align: center; }
        #table-description {width:70%;}
    </style>
    <thead>
        <tr>
            <th>Modules</th>
            <th>Description</th>
            <th>Done</th>
        </tr>
    </thead>
    <tbody>
        <tr>
            <td>Preprocessing</td>
            <td>
                <ol style="padding-right:15px;">
                    <li>Binarization</li>
                    <li>Skew correction</li>
                    <li>Text normalization</li>
                    <li>Laplacian filter</li>
                    <li>Set size 512x512</li>
                </ol>
            </td>
            <td>
                <ul style="list-style-type:none;padding:0;margin-left:5px;margin-right:5px;">
                    <li>&#x2611;</li>
                    <li>&#x2612;</li>
                    <li>&#x2612;</li>
                    <li>&#x2611;</li>
                    <li>&#x2612;</li>
                </ul>
            </td>
        </tr>
        <tr>
            <td>Postprocessing</td>
            <td>
                <ol style="padding-right:15px;">
<li>1st order relationship EDM<sub>1</sub></li><li>Proposed sorted EDM<sub>1</sub></li><li>2nd order relationship EDM<sub>2</sub></li>
                </ol>
            </td>
            <td>
                <ul style="list-style-type:none;padding:0;margin-left:5px;margin-right:5px;">
                    <li>&#x2611;</li><li>&#x2611;</li><li>&#x2611;</li>
                </ul>
            </td>
        </tr>
        <tr>
            <td>Feature Extraction</td>
            <td>
                <ul style="list-style-type:none;padding:0;margin-left:5px;margin-right:5px;">
<li>Edges Direction</li><li>Homogeneity θ</li><li>Weight</li><li>Pixel Regularity θ</li><li>Edges Regularity θ<sub>*</sub></li>
                </ul>
            </td>
            <td>
                <ul style="list-style-type:none;padding:0;margin-left:5px;margin-right:5px;">
                    <li>&#x2611;</li><li>&#x2611;</li><li>&#x2611;</li><li>&#x2611;</li><li>&#x2611;</li>
                </ul>
            </td>
        </tr>
        <tr>
            <td>Classifications</td>
            <td>
                <ul style="list-style-type:none;padding:0;margin-left:5px;margin-right:5px;">
<li>Bayes network</li><li>Multilayer Network</li><li>Decision Tree</li>
                </ul>
            </td>
            <td>
                <ul style="list-style-type:none;padding:0;margin-left:5px;margin-right:5px;">
                    <li>&#x2612;</li><li>&#x2612;</li><li>&#x2611;</li>
                </ul>
            </td>
        </tr>
    </tbody>
</table>

#### θ,θ<sub>*</sub> Description

| θ Type | Value |
|:---:|:---:|
| θ | 0°, 45°, 90°, 135° |
| θ<sub>*</sub> | 0°, 45°, 90°, 135°,180°, 225° |

#### Paper experiment results

| Classifier | Accuracy |
|:---:|:---:|
| Bayes network  | 92.473% |
| Multilayer Network | 95.341% |
| Decision Tree | 97.85% |


In [None]:
# All the imports you will need in the whole lab
from skimage.feature import greycomatrix, greycoprops
from skimage.exposure import histogram
from skimage import io
from skimage.filters import threshold_otsu
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
import os
import numpy as np
from matplotlib.pyplot import bar
import cv2
import csv


#### Utilities

In [None]:
def show_images(images,titles=None):
    #This function is used to show image(s) with titles by sending an array of images and an array of associated titles.
    # images[0] will be drawn with the title titles[0] if exists
    # You aren't required to understand this function, use it as-is.
    n_ims = len(images)
    if titles is None: titles = ['(%d)' % i for i in range(1,n_ims + 1)]
    fig = plt.figure()
    n = 1
    for image,title in zip(images,titles):
        a = fig.add_subplot(1,n_ims,n)
        if image.ndim == 2: 
            plt.gray()
        plt.imshow(image)
        a.set_title(title)
        n += 1
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_ims)
    plt.show()

def showHist(img):
    # An "interface" to matplotlib.axes.Axes.hist() method
    plt.figure()
    imgHist = histogram(img, nbins=256)
    
    bar(imgHist[1].astype(np.uint8), imgHist[0], width=0.8, align='center')

In [None]:
def read_data(file_name):
    ## HINT 1: How is the data ordered in the file?
    ## HINT 2: Do you need to cast the data you read from the file?
    data = []
    with open(file_name, newline='') as csv_file:
        spamreader = csv.reader(csv_file, delimiter=' ', quotechar='|')
        for row in spamreader:
            data.append([float(element) for element in row[0].split(',')])
    return data

In [None]:
def read_test_data():
    
    # TODO [2.A]: Read the file 'test_data.csv' into the variable test_data
    # test_data contains the unlabelled test class.
    ## HINT: Do you need to cast the data you read from the file?

    test_data = read_data('test_data.csv')
    
    # TODO [2.B]: Read the file 'test_data_true.csv' into the variable test_data_true
    # test_data_true contains the actual classes of the test instances, which you will compare
    # against your predicted classes.
    ## HINT: Do you need to cast the data you read from the file?

    test_data_true = []
    with open('./test_data_true.csv', newline='') as csv_file:
        spamreader = csv.reader(csv_file, delimiter=' ', quotechar='|')
        for row in spamreader:
            test_data_true.append(float(row[0]))
            
    return test_data, test_data_true

In [None]:
def read_image_folder(base_directory, sections, number_of_fonts):
    '''
    DESCRIPTION:
    Get array of images at specific directory. Directory is divied into M-sections. 
    Each section is divided in N-fonts
    
    RETURN:
    Array of N-fonts of arrays of images
    '''
    images = [[] for x in range(number_of_fonts)]
    for section_num in sections:
        for folder_num in range(1, number_of_fonts + 1):
            folder_path = base_directory + "\\section" + str(section_num) + "\\" + str(folder_num)
            filenames = os.listdir(folder_path)
            
            font_images = []
            for fn in filenames:
                path = os.path.join(folder_path, fn)
                img = io.imread(path)            
                font_images.append(img)
            
            images[section_num - 1] += font_images
    return np.asarray(images)

In [None]:
def extract_feature_image_folder(base_directory, sections, number_of_fonts):
    '''
    DESCRIPTION:
    Get array of images at specific directory. Directory is divied into M-sections. 
    Each section is divided in N-fonts. Then preprocess, extract features
    
    RETURN:
    Array of N-fonts of arrays of features/image
    [
        Font_X[
            FeatureImg1[...]
            FeatureImg2[...]
            FeatureImg3[...]
            ...
        ]
    ]
    '''
    features = [[] for x in range(number_of_fonts)]
    for section_num in sections:
        for folder_num in range(1, number_of_fonts + 1):
            folder_path = base_directory + "\\section" + str(section_num) + "\\" + str(folder_num)
            filenames = os.listdir(folder_path)
            
            for fn in filenames:
                path = os.path.join(folder_path, fn)
                img = io.imread(path)
                preprocessed_image = preprocessing_image(img)
                image_features = proposed_method(preprocessed_image)
                features[folder_num - 1].append(image_features)
    return np.asarray(features)

In [None]:
def preprocessing_image(img):
    '''
    DESCRIPTION:
    Preprocess an image.
        1. Grayscale
        2. OTSU Threshold
        3. Binarization
        4. Checking image binary is 0 or 255
        5. Laplacian filter
    
    RETURN:
    Preprocessd Image
    '''
    grayscale_image = rgb2gray(img)
    if grayscale_image.max() <= 1:
        grayscale_image = (grayscale_image * 255)
    grayscale_image = grayscale_image.astype(np.uint8)
    
    global_threshold = threshold_otsu(grayscale_image)
    binary_image = np.where(grayscale_image > global_threshold, 255, 0)
    # binary_image = grayscale_image > global_threshold
    
    image_histogram = np.asarray(histogram(binary_image, nbins=256))
    if image_histogram.argmax() <= 150:
        binary_image = 255 - binary_image
    binary_image = binary_image.astype(np.uint8)
    
    
    laplacian_image = cv2.convertScaleAbs(cv2.Laplacian(binary_image, cv2.CV_16S, ksize=3))
    laplacian_image = 255 - laplacian_image
    # show_images([grayscale_image, binary_image, laplacian_image], ['GRAYSCALE', 'BINARY', 'SAMPLE'])
    # showHist(laplacian_image)
    return laplacian_image

In [None]:
def sort_edm(edm):
    ''' 
    DESCRIPTION:
        Angles
        135   90   45
        180   c     0
        225  270  315
        --------------
        1D
        0 45 90 135 180 225 270 315
        Index
        0 1  2  3   4   5   6   7
    
    RETURN:
    Proposed sorted Array
    '''
    
    edm_1 = [(0, edm[1, 2]), (45, edm[0, 2]), (90, edm[0, 1]), (135, edm[0, 0]),
             (180, edm[1, 0]), (225, edm[2, 0]), (270, edm[2, 1]), (315, edm[2, 2])]
    
    edm_sorted = np.asarray(sorted(edm_1, key=lambda x:x[1], reverse=True))
    
    for i in range(1, 8, 2):
        subsquent_angle = edm_sorted[i - 1, 0] + 180 if edm_sorted[i - 1, 0] + 180 < 360 else edm_sorted[i - 1, 0] - 180
        subsquent_angle_index = np.where(edm_sorted[:,0] == subsquent_angle)[0][0]

        if edm_sorted[i, 0] != edm_sorted[subsquent_angle_index, 0]:
            edm_sorted[[i, subsquent_angle_index]] = edm_sorted[[subsquent_angle_index, i]]
            
    return edm_sorted

In [None]:
def edge_direction_matrix(image):
    '''
    DESCRIPTION:
    Takes an image. Calculates EDM-1
    
    RETURN:
    Edge Direction Matrix-1, Sum of Black Pixel in Iedge
    '''
    
    padded_image = np.pad(image, 1, 'constant', constant_values=[1])
    padded_image = np.where(padded_image > 0, 0, 1)
    edm = np.zeros((3,3))
    black_pixels = 0
    for i in range(1, padded_image.shape[0] - 1):
        for j in range(1, padded_image.shape[1] - 1):
            if padded_image[i, j] == 1:
                edm += padded_image[i-1:i+2, j-1:j+2]
                black_pixels += padded_image[i-1:i+2, j-1:j+2].reshape(1, -1).sum()
    return edm, black_pixels

In [None]:
def edge_direction_matrix_2(image, edm_1):
    '''
    DESCRIPTION:
    Takes an image and EDM-1. Calculates EDM-2
    
    RETURN:
    Edge Direction Matrix-2
    '''
    
    angles = np.asarray([
        [135, 90, 45],
        [180, -1, 0],
        [225, 270, 315]
    ])
    relationship_order = sort_edm(edm_1)
    edm_2 = np.zeros((3,3))
    padded_image = np.pad(image, 1, 'constant', constant_values=[1])
    padded_image = np.where(padded_image > 0, 0, 1)
    for i in range(1, padded_image.shape[0] - 1):
        for j in range(1, padded_image.shape[1] - 1):
            if padded_image[i, j] == 1:
                edm_2[1, 1] += 1
                edge_found = []
                if padded_image[i, j + 1] == 1:
                    # angle 180
                    rs_index = np.where(relationship_order[:,0] == 180)[0]
                    edge_found.append((relationship_order[rs_index][0, 0], relationship_order[rs_index][0, 1], rs_index))
                if padded_image[i - 1, j + 1] == 1:
                    # angle 225
                    rs_index = np.where(relationship_order[:,0] == 225)[0]
                    edge_found.append((relationship_order[rs_index][0, 0], relationship_order[rs_index][0, 1], rs_index))
                if padded_image[i - 1, j] == 1:
                    # angle 270
                    rs_index = np.where(relationship_order[:,0] == 270)[0]
                    edge_found.append((relationship_order[rs_index][0][0], relationship_order[rs_index][0][1], rs_index))
                if padded_image[i - 1, j - 1] == 1:
                    # angle 315
                    rs_index = np.where(relationship_order[:,0] == 315)[0]
                    edge_found.append((relationship_order[rs_index][0][0], relationship_order[rs_index][0][1], rs_index))
                if padded_image[i, j - 1] == 1:
                    # angle 0
                    rs_index = np.where(relationship_order[:,0] == 0)[0]
                    edge_found.append((relationship_order[rs_index][0][0], relationship_order[rs_index][0][1], rs_index))
                if padded_image[i + 1, j - 1] == 1:
                    # angle 45
                    rs_index = np.where(relationship_order[:,0] == 45)[0]
                    edge_found.append((relationship_order[rs_index][0][0], relationship_order[rs_index][0][1], rs_index))
                if padded_image[i + 1, j] == 1:
                    # angle 90
                    rs_index = np.where(relationship_order[:,0] == 90)[0]
                    edge_found.append((relationship_order[rs_index][0][0], relationship_order[rs_index][0][1], rs_index))
                if padded_image[i + 1, j + 1] == 1:
                    # angle 135
                    rs_index = np.where(relationship_order[:,0] == 135)[0]
                    edge_found.append((relationship_order[rs_index][0][0], relationship_order[rs_index][0][1], rs_index))
                # -----------------------------------------------------------  
                edge_found = sorted(edge_found, key=lambda x:x[2])
                edge_found = sorted(edge_found, key=lambda x:x[1], reverse=True)
                angle_to_increment = edge_found[0][0]
                
                x, y = np.argwhere(angles == angle_to_increment)[0]
                edm_2[x, y] += 1
    return edm_2

In [None]:
def extract_features_edms(edm_1, edm_2, sum_black_pixels):
    '''
    DESCRIPTION:
    Calculating feature vector of an image
    
    RETURN:
    Array of features 1-D
    '''
    image_features = []
    
    pivot_edm_1 = edm_1[1, 1]
    pivot_edm_2 = edm_2[1, 1]
    
    edm_1_1d = np.asarray(edm_1.reshape(1, -1))[0]
    edm_1_1d = np.delete(edm_1_1d, 4)
    
    edm_2_1d = np.asarray(edm_2.reshape(1, -1))[0]
    edm_2_1d = np.delete(edm_2_1d, 4)
    
    sub_edm_1 = np.asarray([edm_1[1, 2], edm_1[0, 2], edm_1[0, 1], edm_1[0, 0]])
    # Feature 1
    edges_direction = edm_1_1d.max()
    image_features.append(edges_direction)
    # Feature 2, 3, 4, 5
    homogeneity = sub_edm_1 / sum(edm_1.reshape(1, -1)[0])
    for idx, h in enumerate(homogeneity):
        image_features.append(h)
    # Feature 6
    weight = pivot_edm_1 / sum_black_pixels
    image_features.append(weight)
    # Feature 7, 8, 9, 10
    pixel_regularity = sub_edm_1 / pivot_edm_1
    for idx, px_reg in enumerate(pixel_regularity):
        image_features.append(px_reg)
    # Feature 11, 12, 13, 14, 15, 16, 17, 18
    edges_regularity = edm_2_1d / pivot_edm_2
    
    for idx, ed_reg in enumerate(edges_regularity):
        image_features.append(ed_reg)
    return image_features

In [None]:
def proposed_method(image):
    '''
    DESCRIPTION:
    Feature Extraction Module
    
    RETURN:
    Array of features 1-D
    '''
    
    edm_1, sum_black_pixels = edge_direction_matrix(image)
    edm_2 = edge_direction_matrix_2(image, edm_1)
    return extract_features_edms(edm_1, edm_2, sum_black_pixels)

In [None]:
'''
Validating feature extraction of the research paper
'''

image = [
    [1, 0, 0, 0, 1, 1],
    [0, 1, 1, 1, 0, 1],
    [0, 1, 1, 1, 1, 0],
    [1, 0, 0, 1, 1, 0],
    [1, 1, 1, 0, 0, 1]]

image = np.asarray(image)
# print(sort_edm(edge_direction_matrix(image)))
edm_1, sum_black_pixels = edge_direction_matrix(image)
edm_2 = edge_direction_matrix_2(image, edm_1)
print(edm_1, sum_black_pixels)
print(edm_2)
print(extract_features_edms(edm_1, edm_2, sum_black_pixels))

## Trying of make a pipeline with ACdata_base

In [None]:
# Number of fonts
nClasses = 9
base_directory = ".\\Dataset_Analyzed\\"
# Training sections
training_sections = [1, 2, 3, 4]
# Validating sections
validating_sections = [5]

---

##### BELOW 2 CELLS NOT USED
TRIED TO USED `read_image_folder()`

SOMETHING WENT WRONG

In [None]:
# Reading all training images
training_images = read_image_folder(base_directory, training_sections, nClasses)

In [None]:
# Preprocessing training images & Extract Features
training_features = []
for class_number, class_images in enumerate(training_images):
    print(class_number)
    for image in class_images:
        preprocessed_image = preprocessing_image(image)
        image_features = proposed_method(preprocessed_image)
        training_features.append([class_number + 1] + image_features)
    print("CLASS", str(class_number + 1) + ":", "FINISHED")

with open('training_data.txt', 'a') as csvfile:
    np.savetxt(csvfile, np.asarray(training_features), delimiter=",")

---

##### EXTRACTED ALL TRAINING FEATURES & SAVE IN CSV

In [None]:
# (9,[])
training_features = extract_feature_image_folder(base_directory, training_sections, nClasses)

In [None]:
for idx, class_features in enumerate(training_features):
    print(idx)
    for image_feature in enumerate(class_features):
        with open('training_data.csv', 'a') as csvfile:
            np.savetxt(csvfile, np.asarray([[idx + 1] + image_feature[1]]), delimiter=",")

##### EXTRACTED ALL VALIDATING FEATURES & SAVE IN CSV

In [None]:
test_features = extract_feature_image_folder(base_directory, validating_sections, nClasses)

In [None]:
for idx, class_features in enumerate(test_features):
    print(idx)
    for image_feature in enumerate(class_features):
        with open('test_data.csv', 'a') as csvfile:
            np.savetxt(csvfile, np.asarray([image_feature[1]]), delimiter=",")
        with open('true_classification.txt', 'a') as csvfile:
            np.savetxt(csvfile, np.asarray([idx + 1]), delimiter=",")