In [None]:

import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense


In [None]:
dataset_dir = "dataset/dataset/train"
image_paths = []
labels_dict = {}
labels = []
pairs = []
count = 0
for writer_label, writer_folder in enumerate(os.listdir(dataset_dir)):
    count += 1
    if(count > 100):
        break
    writer_folder_path = os.path.join(dataset_dir, writer_folder)
    if not os.path.isdir(writer_folder_path):
        continue

    labels_dict[writer_label] = []

    for img_name in os.listdir(writer_folder_path):
        img_path = os.path.join(writer_folder_path, img_name)
        image_paths.append(img_path)
        labels_dict[writer_label].append(img_path)

In [None]:
def feature_extractor(input_shape=(150, 1000, 1)):
    model = Sequential([
        Conv2D(64, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
f_e = feature_extractor((150, 1000, 1))

In [None]:
print("Extracting Features! for", len(labels_dict), "writers")
count = 0
X = []
y = []
for label, img_path in labels_dict.items():
    count += 1
    if count > 50: 
        break
    
    for(path) in img_path:
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        c = img.shape[0]//2
        img = img[c-75:c+75, 600:1600]
        img = cv2.Canny(img, 100, 200)

        img = img / 255.0
        try:
            img = img.reshape(1, 150, 1000, 1)
        except:
            print("Error Reshaping")
            continue
        img = f_e.predict(img)
        img = img.flatten()
        X.append(img)
        y.append(label)
    if count%10==0:print("Features Extracted for", count, "writers")

Extracting Features! for 100 writers
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 424ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
[1m1/1[0m [32m━━━━━━━━━

In [None]:
knn = KNeighborsClassifier(n_neighbors=2)
X = np.array(X)
y = np.array(y)
print(X.shape, y.shape)
knn.fit(X, y)

(447, 535296) (447,)


In [None]:
y_pred = knn.predict(X)
print(y_pred)
print(y)

[ 0  0  0  0  0  0  0  0  1  1  1  1  1  1  1  2  2  2  2  2  3  3  3  3
  3  1  4  4  4  4  4  5  5  5  5  5  6  2  6  6  2  6  7  7  7  7  7  7
  7  7  7  8  2  2  8  8  8  8  9  9  0  9  9  9 10  1 10 10 10 10 10 10
  1 10 11  1 11 11 11 11 11 11  0 12 12 12 12 10 12 12 12 12 12 12 12 13
 13  0  0 13 13 13 14 14 11 14  0  0  1  8  3  3 15 15 15  8  4  8  8 15
 16 16 16 10 16 16  7  7  8  7  1 17 17  6 17  0 17 18 18 11 18 18  6 18
 19 19 19 19 19 19 19 19  7 19 19 20 20 20 20 20 20 16 20 20 20  1 21 21
 21 21 21 19 19  3 21 21  5  4 22 22 22 22 22 22 19 22 22 19 22 23 23 23
 19  0 23 12 23  7 19 21 15 13 11 24 24 24 24  5 24 24 19 25 25 25 25 25
 19 19 19 19  7  6 19 26 26 26 27 19 10 27 27 19 27 19 28 28 28 28 28 28
 28 28  2 10  3  8  2 29 29 19 29 29  7 29 29 28 19  4 30 30 30 30 19 30
  3  6  6 30 29 31 31 31 31 31 31 29 19 31 19 10 32 32 32 32 32 32 32 19
 19 19 19 32 11 11  2 10  2 32 19 11 19 19 19 32 34 10  1 28 32 34  7 10
 19 34 32  0 32 28 35 11 11 28 10 35 19 19 32 21 36

In [None]:
knn.score(X, y)

0.5078299776286354