In [23]:
# All the imports you will need in the whole lab
from sklearn.neighbors import NearestCentroid, KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB, CategoricalNB, ComplementNB
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.tree import ExtraTreeClassifier, DecisionTreeClassifier
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

from skimage.exposure import histogram
from skimage.morphology import skeletonize
from skimage import io
from skimage.filters import threshold_otsu, laplace
from skimage.color import rgb2gray
from skimage.util import invert
from skimage.segmentation import flood, flood_fill

import numpy as np
import matplotlib.pyplot as plt
import os
import cv2
import csv


In [2]:
def show_images(images,titles=None):
    #This function is used to show image(s) with titles by sending an array of images and an array of associated titles.
    # images[0] will be drawn with the title titles[0] if exists
    # You aren't required to understand this function, use it as-is.
    n_ims = len(images)
    if titles is None: titles = ['(%d)' % i for i in range(1,n_ims + 1)]
    fig = plt.figure()
    n = 1
    for image,title in zip(images,titles):
        a = fig.add_subplot(1,n_ims,n)
        if image.ndim == 2: 
            plt.gray()
        plt.imshow(image)
        a.set_title(title)
        n += 1
    fig.set_size_inches(np.array(fig.get_size_inches()) * n_ims)
    plt.show()

def showHist(img):
    # An "interface" to matplotlib.axes.Axes.hist() method
    plt.figure()
    imgHist = histogram(img, nbins=256)
    
    plt.bar(imgHist[1].astype(np.uint8), imgHist[0], width=0.8, align='center')

In [3]:
def read_data(file_name):
    data = []
    with open(file_name, newline='') as csv_file:
        spamreader = csv.reader(csv_file, delimiter=' ', quotechar='|')
        for row in spamreader:
            data.append([float(element) for element in row[0].split(',')])
    return data

In [4]:
def read_test_data():
    test_data = read_data('validating_data.csv')

    test_data_true = []
    with open('./true_validating_data.csv', newline='') as csv_file:
        spamreader = csv.reader(csv_file, delimiter=' ', quotechar='|')
        for row in spamreader:
            test_data_true.append(float(row[0]))
            
    return test_data, test_data_true

In [345]:
def diacritic_segmentation(image):
    img = np.where(image > 0, 0, 1)
    horizontal_projection = np.sum(img, axis = 1)
    # got certain line
    baseline_location_index = horizontal_projection.argmax()
    
    diacritic_image = np.copy(image)
    previous_val = -1
    current_val = diacritic_image[baseline_location_index,:].argmin()
    
    while previous_val != current_val:
        diacritic_image = flood_fill(diacritic_image, (baseline_location_index, current_val), 255)
        previous_val = current_val
        current_val = diacritic_image[baseline_location_index,:].argmin()

    image_text = image - diacritic_image
    image_text = np.where(image_text > 0, 0, 1)
    
    # show_images([image, diacritic_image, image_text])

    return diacritic_image.astype(np.uint8), image_text.astype(np.uint8)

In [357]:
def preprocessing_image(img):
    '''
    DESCRIPTION:
    Preprocess an image.
        1. Grayscale
        2. OTSU Threshold
        3. Binarization
        4. Checking image binary is 0 or 255
        5. Laplacian filter
    
    RETURN:
    Preprocessd Image
    '''
    grayscale_image = rgb2gray(img)
    if grayscale_image.max() <= 1:
        grayscale_image = (grayscale_image * 255)
    grayscale_image = grayscale_image.astype(np.uint8)
    
    global_threshold = threshold_otsu(grayscale_image)
    binary_image = np.where(grayscale_image > global_threshold, 255, 0)
    # binary_image = grayscale_image > global_threshold
    
    image_histogram = np.asarray(histogram(binary_image, nbins=256))
    if image_histogram.argmax() <= 150:
        binary_image = 255 - binary_image
    binary_image = binary_image.astype(np.uint8)
    
    edge_image = cv2.convertScaleAbs(cv2.Laplacian(binary_image, cv2.CV_16S, ksize=3))
    edge_image = 255 - edge_image
    edge_image[edge_image == 255] = 1
    
    inverted_image = invert(binary_image)
    inverted_image[inverted_image == 255] = 1
    skeleton_image = skeletonize(inverted_image, method='zhang')
    skeleton_image = invert(skeleton_image)
    skeleton_image = skeleton_image.astype(np.uint8)
    
    diacritic_image, text_image = diacritic_segmentation(binary_image)
    diacritic_image = np.where(diacritic_image > 0, 1, 0)
    diacritic_image = diacritic_image.astype(np.uint8)
    # show_images([binary_image, edge_image, skeleton_image, diacritic_image, text_image], ['BINARY', 'EDGE IMAGE', 'SKELETON', 'DIACRITIC_IMAGE', 'TEXT'])
    
    return binary_image, edge_image, skeleton_image, diacritic_image, text_image

# img = io.imread("..\\..\\data\\raw\\1\\0001.jpg")
# img = io.imread("..\\..\\data\\raw\\2\\0191.jpg")
# x = preprocessing_image(img)

In [358]:
def hvsl_features(edge_image):
    thresh = cv2.threshold(edge_image, 0, 1, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    
    # Detect horizontal lines
    horizontal_image = np.ones(edge_image.shape)
    horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40,1))
    detect_horizontal = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, horizontal_kernel, iterations=2)
    cnts = cv2.findContours(detect_horizontal, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    # for c in cnts:
    cv2.drawContours(horizontal_image, cnts, -1, (0, 0, 0), 2)
        
    horizontal_image = np.where(horizontal_image > 0, 0, 1)
    horizontal_lines = np.sum(horizontal_image, axis=1)
    h_unique, h_count = np.unique(horizontal_lines, return_counts=True)
    h_dict = dict(zip(h_unique, h_count))
    frequency_horizontal_lines = edge_image.shape[0] - h_dict[0] if h_dict[0] != edge_image.shape[0] else 0
    
    # Detect vertical lines
    vertical_image = np.ones(edge_image.shape)
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,10))
    detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]

    # for c in cnts:
    cv2.drawContours(vertical_image, cnts, -1, (0, 0, 0), 2)
        
    vertical_image = np.where(vertical_image > 0, 0, 1)
    vertical_lines = np.sum(vertical_image, axis=0)
    v_unique, v_count = np.unique(vertical_lines, return_counts=True)
    v_dict = dict(zip(v_unique, v_count))
    frequency_vertical_lines = edge_image.shape[1] - v_dict[0] if v_dict[0] != edge_image.shape[1] else 0
    
    # show_images([edge_image, horizontal_image, vertical_image])
    
    sum_v_h_lines = np.sum(horizontal_lines) + np.sum(vertical_lines)
    
    ratio = (np.sum(np.sum(edge_image, axis=0)) - sum_v_h_lines) / sum_v_h_lines
    
    # print(frequency_horizontal_lines, frequency_vertical_lines, ratio)
    
    return frequency_horizontal_lines, frequency_vertical_lines, ratio

# img = io.imread("..\\..\\data\\raw\\1\\0001.jpg")
# img = io.imread("..\\..\\data\\raw\\2\\0191.jpg")
# img = io.imread("..\\..\\data\\raw\\9\\1496.jpg")
# x = preprocessing_image(img)
# hvsl_features(x[1])

In [359]:
def edm(image):
    padded_image = np.pad(image, 1, 'constant', constant_values=[1])
    padded_image = np.where(padded_image > 0, 0, 1)
    edm = np.zeros((3,3))
    for i in range(1, padded_image.shape[0] - 1):
        for j in range(1, padded_image.shape[1] - 1):
            if padded_image[i, j] == 1:
                edm += padded_image[i-1:i+2, j-1:j+2]
    
    edm_1_1d = np.asarray(edm.reshape(1, -1))[0]
    edm_1_1d = np.delete(edm_1_1d, 4)
    
    return edm, edm_1_1d.max()

def text_orientation_features(skeleton_image, edge_image):
    skeleton_edm, skeleton_direction = edm(skeleton_image)
    edge_edm, edge_direction = edm(edge_image)
    
    return skeleton_direction, edge_direction

In [360]:
def lvl_features(skeleton_image):
    # print(np.unique(skeleton_image))
    # print(skeleton_image)
    thresh = cv2.threshold(skeleton_image, 0, 1, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    
    # Detect vertical lines
    vertical_image = np.ones(skeleton_image.shape)
    vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,10))
    detect_vertical = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, vertical_kernel, iterations=2)
    cnts = cv2.findContours(detect_vertical, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]

    cv2.drawContours(vertical_image, cnts, -1, (0, 0, 0), 2)

    vertical_image = np.where(vertical_image > 0, 0, 1)
    vertical_lines = np.sum(vertical_image, axis=0)
    v_unique, v_count = np.unique(vertical_lines, return_counts=True)
    v_dict = dict(zip(v_unique, v_count))
    
    # show_images([vertical_image])

    perimeter=[]
    for cnt in cnts[1:]:
        perimeter.append(cv2.arcLength(cnt,True))
    
    text_height = vertical_lines.max()
    frequency_vertical_lines = skeleton_image.shape[1] - v_dict[0] if v_dict[0] != skeleton_image.shape[1] else 0
    maximum_height = max(perimeter)
    ratio = (text_height - maximum_height) / maximum_height
    variance = np.mean(vertical_lines)
    
    return text_height, frequency_vertical_lines, maximum_height, ratio, variance
    
# img = io.imread("..\\..\\data\\raw\\1\\0001.jpg")
# img = io.imread("..\\..\\data\\raw\\2\\0191.jpg")
# img = io.imread("..\\..\\data\\raw\\9\\1496.jpg")
# x = preprocessing_image(img)
# lvl_features(x[2])

In [370]:
def tth_features(skeleton_image, edge_image):
    # show_images([skeleton_image, edge_image, skeleton_image + edge_image])
    pass
    
# img = io.imread("..\\..\\data\\raw\\9\\1496.jpg")
# x = preprocessing_image(img)
# tth_features(x[2], x[1])

In [372]:
def sp_features(diacritic_image):
    thuluth_mohakik_sp_img = io.imread(".\\special_diacritics.jpg")
    
    thuluth_mohakik_sp_gs = rgb2gray(thuluth_mohakik_sp_img)
    if thuluth_mohakik_sp_gs.max() <= 1:
        thuluth_mohakik_sp_gs = (thuluth_mohakik_sp_gs * 255)
    thuluth_mohakik_sp_gs = thuluth_mohakik_sp_gs.astype(np.uint8)
    
    global_threshold = threshold_otsu(thuluth_mohakik_sp_gs)
    thuluth_mohakik_sp_bin_img = np.where(thuluth_mohakik_sp_gs > global_threshold, 1, 0)
    
    # print(np.unique(diacritic_image))
    # print(np.unique(thuluth_mohakik_sp_bin_img))
    
    # show_images([thuluth_mohakik_sp_bin_img])
    
    thuluth_mohakik_sp_bin_img = thuluth_mohakik_sp_bin_img.astype(np.uint8)
    d2 = cv2.matchShapes(diacritic_image, thuluth_mohakik_sp_bin_img, cv2.CONTOURS_MATCH_I3, 0)
    
    return d2

# img = io.imread("..\\..\\data\\raw\\1\\0001.jpg")
# img = io.imread("..\\..\\data\\raw\\2\\0191.jpg")
# img = io.imread("..\\..\\data\\raw\\9\\1496.jpg")
# x = preprocessing_image(img)
# sp_features(x[3])

In [373]:
def wor_features(text_image):
    img = np.where(text_image > 0, 0, 1)
    horizontal_projection = np.sum(img, axis = 1)
    # got certain line
    baseline_location_index = horizontal_projection.argmax()
    
    diacritic_image = np.copy(text_image)
    previous_val = -1
    current_val = diacritic_image[baseline_location_index,:].argmin()
    pass