In [28]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from scipy import ndimage
from scipy.ndimage import label
import statistics
import os


In [29]:

def crop_handwritten_region(imgpath):
    
    img = cv2.imread(imgpath)
    imgray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    ret3,bin_img = cv2.threshold(imgray,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)

    contours, hierarchy = cv2.findContours(bin_img, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    width_threshold = 1000
    height_threshold = 500

    width_array =[]
    y_array =[]
    # Detect the main horizontal black separator lines of the IAM handwriting forms.
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        
        if w > width_threshold:
            if h <height_threshold:
                width_array.append (w) 
                y_array.append(y)
                
                
    indixes = sorted(range(len(width_array)), key=lambda k: width_array[k])
    indixes.reverse()
    three_lines_y=[]
    three_lines_y.append(y_array[0])
    three_lines_y.append(y_array[1])
    three_lines_y.append(y_array[2])
    three_lines_y.sort()

    newCooriate_y1= three_lines_y[1]
    newCooriate_y2= three_lines_y[2]

    cropped_imagebin = bin_img[newCooriate_y1+4:newCooriate_y2 , :]
    cropped_imagegray = imgray[newCooriate_y1+4:newCooriate_y2 , :]
    return cropped_imagebin, cropped_imagegray

def split_lines(img):
    
    # array contains summation of black pixels on each row of the image
    sum_black_in_row = np.sum(img < 255, axis=1)
    # threshold for rows contains black pixel > 15 
    intial_lines = sum_black_in_row > 15
    lines = []
    i = 0

    while i < len(intial_lines):
        if intial_lines[i] == True:
            begin_row = i
            if begin_row - 6 < 0:
                up = 0
            else:
                up = begin_row - 6
            while i < len(intial_lines) and intial_lines[i]:
                i += 1
            if i+5 > len(intial_lines) - 1 :
                down=len(intial_lines) - 1
            else:
                 down = i + 6
            if i - begin_row > 20:  # threshold for # of rows to be higher than 20 row 
                lines.append(img[up:down, :])
        i += 1
    return lines

# if __name__ == '__main__':
#     imgpath ='a01-000u.png'
#     cropped_imgbin,cropped_imggray = crop_handwritten_region(imgpath)
#     imgplot = plt.imshow(cropped_imgbin)
#     plt.show()
#     imgplot = plt.imshow(cropped_imggray)
#     plt.show()

#     for i in split_lines(cropped_imgbin):
#         imgplot = plt.imshow(i)
#         plt.show()

In [30]:
# change this later to be R = 3 instead of R = 1
def lbp_calculate_pixels(img, x, y):
    threshold = img[x, y]
    bin_val = []
    bin_val.append(int(img[x - 3, y] >= threshold))
    bin_val.append(int(img[x - 2, y + 2] >= threshold))
    bin_val.append(int(img[x, y + 3] >= threshold))
    bin_val.append(int(img[x + 2, y + 2] >= threshold))
    bin_val.append(int(img[x + 3, y] >= threshold))
    bin_val.append(int(img[x + 2, y - 2] >= threshold))
    bin_val.append(int(img[x, y - 3] >= threshold))
    bin_val.append(int(img[x - 2, y - 2] >= threshold))
    
    power_val = [1, 2, 4, 8, 16, 32, 64, 128]
    
    decimal = 0
    
    for i in range(len(power_val)):
        decimal += bin_val[i] * power_val[i]
    
    return decimal

def lbp_get_result(img):
    height, width = img.shape
    result = np.zeros((height, width), np.uint8)
    result = np.copy(img)
    for i in range(3, height-3):
        for j in range(3, width-3):
            result[i, j] = lbp_calculate_pixels(img, i, j)
    return result

# get the histogram of the resulted lbp img as our featured vector
def lbp_hist(img_lbp):
    img_reshaped = img_lbp.reshape(-1, 1)
    result_hist, result_hist_bins = np.histogram(img_reshaped)
    return result_hist

def lbp_normalize(lbp_hist):
    lbp_mean = statistics.mean(lbp_hist)
    lbp_hist = lbp_hist / lbp_mean
    return lbp_hist

In [31]:
# run features

# lbp_img = lbp_get_result(cropped_imggray)
# result_hist = lbp_hist(lbp_img)

# result_normalized = lbp_normalize(result_hist)

In [44]:
path = 'C:\\Users\\basma\\Writer-Identification-System\\data'
directory = os.listdir(path)

for folder in directory:
    training_features = []
    labels = []
    label_t = [];
    test_features = []
    for file in range(1,4):
        for img in range(1,3):
            print(path + '\\' +folder + '\\' + str(file) + '\\' + str(img) +'.png')    
            print(file)
            labels.append(file)
            cropped_imgbin,cropped_imggray = crop_handwritten_region(path + '\\' +folder + '\\' + str(file) + '\\' + str(img) +'.png')
            lbp_img = lbp_get_result(cropped_imggray)
            result_hist = lbp_hist(lbp_img)
            result_normalized = lbp_normalize(result_hist)
            training_features.append(result_normalized)
            print(result_normalized)
                
    print(path + '\\' +folder + '\\test.png')
    cropped_imgbin,cropped_imggray = crop_handwritten_region(path + '\\' +folder + '\\test.png')
            
    lbp_img = lbp_get_result(cropped_imggray)
    result_hist = lbp_hist(lbp_img)
    result_normalized = lbp_normalize(result_hist)
    test_features.append(result_normalized)
    print(result_normalized)
    
    classifier = KNeighborsClassifier(n_neighbors=5)  
    classifier.fit(training_features, labels)
    writer_prediction = classifier.predict(test_features)
    print("knn",writer_prediction)
    
#     svm_clf = LinearSVC(random_state=0,tol=1e-5,dual=False)
#     svm_clf.fit(training_features, labels)
#     writer_prediction = svm_clf.predict(test_features)
#     print("svm",writer_prediction)
#     accuracy_score(y_test, writer_prediction)
    

C:\Users\basma\Writer-Identification-System\data\01\1\1.png
1
[0.21628686 0.26610958 0.14502461 0.05010805 0.29128232 0.10127759
 0.07257516 0.31539157 0.17770169 8.36425454]
C:\Users\basma\Writer-Identification-System\data\01\1\2.png
1
[0.22216541 0.25450158 0.15165602 0.0506315  0.31694663 0.11576062
 0.07411914 0.33415661 0.17135806 8.30870847]
C:\Users\basma\Writer-Identification-System\data\01\2\1.png
2
[0.17905825 0.28822482 0.14362082 0.05417278 0.27322357 0.08408204
 0.07743487 0.33405975 0.19588803 8.37025129]
C:\Users\basma\Writer-Identification-System\data\01\2\2.png
2
[0.15853002 0.28846247 0.14969819 0.0514154  0.26759907 0.0750492
 0.07376503 0.3314074  0.20797934 8.39610552]
C:\Users\basma\Writer-Identification-System\data\01\3\1.png
3
[0.14582651 0.26220131 0.12191322 0.04209641 0.24022737 0.07062538
 0.06707824 0.29831191 0.16507817 8.58665745]
C:\Users\basma\Writer-Identification-System\data\01\3\2.png
3
[0.14776521 0.26648692 0.12389033 0.0426828  0.24722151 0.068381

[0.14582651 0.26220131 0.12191322 0.04209641 0.24022737 0.07062538
 0.06707824 0.29831191 0.16507817 8.58665745]
C:\Users\basma\Writer-Identification-System\data\07\3\2.png
3
[0.14776521 0.26648692 0.12389033 0.0426828  0.24722151 0.06838154
 0.06770852 0.30794569 0.17076578 8.55715982]
C:\Users\basma\Writer-Identification-System\data\07\test.png
[0.11113227 0.14669855 0.08263377 0.02539977 0.17065096 0.04984176
 0.03949845 0.1775478  0.1034389  9.09316942]
knn [2]
svm [3]
C:\Users\basma\Writer-Identification-System\data\08\1\1.png
1
[0.21628686 0.26610958 0.14502461 0.05010805 0.29128232 0.10127759
 0.07257516 0.31539157 0.17770169 8.36425454]
C:\Users\basma\Writer-Identification-System\data\08\1\2.png
1
[0.22216541 0.25450158 0.15165602 0.0506315  0.31694663 0.11576062
 0.07411914 0.33415661 0.17135806 8.30870847]
C:\Users\basma\Writer-Identification-System\data\08\2\1.png
2
[0.17905825 0.28822482 0.14362082 0.05417278 0.27322357 0.08408204
 0.07743487 0.33405975 0.19588803 8.3702512

In [None]:
classifier= KNeighborsClassifier(n_neighbors=5)  
classifier.fit(x_train, y_train)

writer_prediction=classifier.predict(x_test)
print(writer_prediction)
# accuracy_score(y_test, writer_prediction)