# Building a binary classifier CNN model for Fingerprint Comparision

Importing Necessary Libraries

In [16]:
import tensorflow as tf
from matplotlib import pyplot as plt
from tensorflow.keras.utils import load_img, img_to_array
import os
from os import listdir
from os.path import isfile, join
import numpy as np
import re

Dataset Paths

In [17]:
real_fingerprints_path = 'D:\SOCOFing\Real'
altered_easy_path = 'D:\SOCOFing\Altered\Altered-Easy'
altered_medium_path = 'D:\SOCOFing\Altered\Altered-Medium'
altered_hard_path = 'D:\SOCOFing\Altered\Altered-Hard'

Building a function to select all files with an folder

In [18]:
def files_in_dir(path):
    return [f for f in listdir(path) if isfile(join(path, f))]

Building a function to provide unique integers for each fingers of an person

In [19]:
regex_str = '^(\d+)__\w_(Left|Right)_(index|little|middle|ring|thumb)_finger'

In [20]:
finger_ids = {
    "Left": {
        "little": 0,
        "ring": 1,
        "middle": 2,
        "index": 3,
        "thumb": 4
    },
    "Right": {
        "little": 9,
        "ring": 8,
        "middle": 7,
        "index": 6,
        "thumb": 5
    }
}

In [21]:
def calc_finger_id(filename):
    match = re.search(regex_str, filename)
    if (match is None):
        print(filename)
        print(regex_str)
    person_id = match.group(1)
    hand = match.group(2)
    finger = match.group(3)
    
    finger_id = ((int(person_id) - 1) * 10) + (finger_ids[hand][finger])
    return finger_id

Building function for preprocessing the images

In [22]:
onlyfiles = files_in_dir(real_fingerprints_path)
len(np.unique([ calc_finger_id(x) for x in onlyfiles ]))

6000

In [23]:
def crop(img, l, t, r, b):
    w, h = img.size
    return img.crop((l, t, w-r, h-b))

In [24]:
def load_fingerprint_img(path):
    img_data = img_to_array(crop(load_img(path, color_mode='grayscale'), 2, 2, 4, 4))
    return img_data

Building a function to split the images intp training and test dataset

In [25]:
def build_dataset(train_folders, test_folders):
    train_imgs = []
    train_labels = []
    test_imgs = []
    test_labels = []

    def populate_data_and_labels(folders, imgs, labels):
        for folder in folders:
            files = files_in_dir(folder)
            for file in files:
                img_data = load_fingerprint_img(join(folder, file))
                img_label = calc_finger_id(file)

                if img_label > 1000:
                    continue

                if (img_data.shape != (97, 90, 1)):
                    # print(join(folder, file))
                    continue
    
                imgs.append(img_data)
                labels.append(img_label)

    populate_data_and_labels(train_folders, train_imgs, train_labels)
    populate_data_and_labels(test_folders, test_imgs, test_labels)

    # return (train_imgs, train_labels), (test_imgs, test_labels)
    return (np.array(train_imgs), np.array(train_labels)), (np.array(test_imgs), np.array(test_labels))

Building a function to greate pairwise image dataset with class label

In [26]:
def build_dataset_pairwise(imgs, labels):
    result_imgs = []
    result_labels = []
    for idx, x in enumerate(imgs):
        for idx2, x2 in enumerate(imgs):
            if abs(idx - idx2) > 5:
                continue
            result_imgs.append(np.array([x, x2]))
            result_labels.append(1 if labels[idx] == labels[idx2] else 0)
    return np.array(result_imgs), np.array(result_labels)

Splitting theTDataset

In [27]:
(train_imgs, train_labels), (test_imgs, test_labels) = build_dataset(
    [real_fingerprints_path, altered_easy_path, altered_hard_path],
    [altered_medium_path]
    # [real_fingerprints_path, altered_easy_path],
    # [real_fingerprints_path],
)

In [28]:
len(train_imgs)

6319

In [29]:
train_imgs, train_labels = build_dataset_pairwise(train_imgs, train_labels)

In [30]:
len(train_imgs)

69479

In [31]:
np.unique(train_labels, return_counts=True)

(array([0, 1]), array([53428, 16051], dtype=int64))

In [32]:
train_imgs.shape

(69479, 2, 97, 90, 1)

In [33]:
test_imgs, test_labels = build_dataset_pairwise(test_imgs, test_labels)

In [34]:
len(test_imgs)

30968

In [35]:
np.unique(test_labels, return_counts=True)

(array([0, 1]), array([22722,  8246], dtype=int64))

In [36]:
test_imgs.shape

(30968, 2, 97, 90, 1)

Building the CNN model

In [38]:
import keras
from keras import layers
from keras import models

In [34]:
img_shp = (97,90,1)

def imgModel():
    model=keras.Sequential([
        layers.Conv2D(32,(3,3),activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64,(3,3),activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(128,(3,3),activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(256,(3,3),activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Flatten()
    ])
    return model

img1input = keras.Input(shape=img_shp)
img2input = keras.Input(shape=img_shp)

sharedCnn = imgModel()
print(type(sharedCnn))

img1features = sharedCnn(img1input)
img2features = sharedCnn(img2input)
print(type(img1features))
print(type(img2features))
combined_features = layers.Concatenate()([img1features, img2features])
print(type(combined_features))

model = combined_features
model = layers.Dense(256,activation='relu')(model)
model = layers.Dense(128,activation='relu')(model)
model = layers.Dense(64,activation='relu')(model)
model = layers.Dense(32,activation='relu')(model)
model = layers.Dense(1,activation='sigmoid')(model)

model = keras.Model(inputs=[img1input, img2input], outputs=model)

<class 'keras.src.models.sequential.Sequential'>
<class 'keras.src.backend.common.keras_tensor.KerasTensor'>
<class 'keras.src.backend.common.keras_tensor.KerasTensor'>
<class 'keras.src.backend.common.keras_tensor.KerasTensor'>


In [35]:
model.summary()

Training the model 

In [36]:
train_imgs1 = np.array([*map(lambda x: x[0], train_imgs)])
train_imgs2 = np.array([*map(lambda x: x[1], train_imgs)])

In [37]:
train_imgs1.shape

(69479, 97, 90, 1)

In [38]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['acc'])
model.fit((train_imgs1, train_imgs2), train_labels, steps_per_epoch=100, epochs=10)

Epoch 1/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m354s[0m 3s/step - acc: 0.6994 - loss: 2.3180 
Epoch 2/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m325s[0m 3s/step - acc: 0.7944 - loss: 0.4307
Epoch 3/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 3s/step - acc: 0.9312 - loss: 0.1668
Epoch 4/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 3s/step - acc: 0.9862 - loss: 0.0404
Epoch 5/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 3s/step - acc: 0.9938 - loss: 0.0193
Epoch 6/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 3s/step - acc: 0.9954 - loss: 0.0144
Epoch 7/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 3s/step - acc: 0.9973 - loss: 0.0085
Epoch 8/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 3s/step - acc: 0.9984 - loss: 0.0051
Epoch 9/10
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3

<keras.src.callbacks.history.History at 0x162cf8c7340>

In [39]:
model.save('./fingerprint_comparison.keras')

Model Evaluation

In [39]:
model = keras.models.load_model('./fingerprint_comparison.keras')

In [40]:
model.summary()

In [41]:
test_imgs1 = np.array([*map(lambda x: x[0], test_imgs)])
test_imgs2 = np.array([*map(lambda x: x[1], test_imgs)])

In [43]:
test_imgs2.shape

(30968, 97, 90, 1)

In [42]:
model.evaluate((test_imgs1, test_imgs2), test_labels, return_dict=True)

[1m968/968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 38ms/step - acc: 0.9977 - loss: 0.0083


{'acc': 0.9977073073387146, 'loss': 0.010219823569059372}