In [1]:
import numpy as np
import glob
import random
import imageio
import PIL, cv2
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from skimage.morphology import convex_hull_image, erosion
from skimage.morphology import square
import matplotlib.image as mpimg
import skimage
import math
from scipy.ndimage.filters import convolve
from PIL import Image,ImageFilter
from skimage.feature import hessian_matrix, hessian_matrix_eigvals

Make sure path of DATA_DIR points to correct directory of dataset

Using only 2000 size display list otherwise becomes too slow

In [2]:
# KAGGLE FINGERPRINT DATA

DATA_DIR = "Real/"
list_dirs = list(glob.glob(DATA_DIR+"*.BMP"))
num_images = len(list_dirs)

random.seed(42)

r = random.randint(0,num_images)
display_list = list_dirs[:2000]
# display_list[0]

In [3]:
def getTerminationBifurcation(img, mask):
    img = img == 255;
    (rows, cols) = img.shape;
    minutiaeTerm = np.zeros(img.shape);
    minutiaeBif = np.zeros(img.shape);
    
    for i in range(1,rows-1):
        for j in range(1,cols-1):
            if(img[i][j] == 1):
                block = img[i-1:i+2,j-1:j+2];
                block_val = np.sum(block);
                if(block_val == 2):
                    minutiaeTerm[i,j] = 1;
                elif(block_val == 4):
                    minutiaeBif[i,j] = 1;
    
    mask = convex_hull_image(mask>0)
    mask = erosion(mask, square(5))         
    minutiaeTerm = np.uint8(mask)*minutiaeTerm
    return(minutiaeTerm, minutiaeBif)

In [4]:
class MinutiaeFeature(object):
    def __init__(self, locX, locY, Orientation, Type):
        self.locX = locX;
        self.locY = locY;
        self.Orientation = Orientation;
        self.Type = Type;

def computeAngle(block, minutiaeType):
    angle = 0
    (blkRows, blkCols) = np.shape(block);
    CenterX, CenterY = (blkRows-1)/2, (blkCols-1)/2
    if(minutiaeType.lower() == 'termination'):
        sumVal = 0;
        for i in range(blkRows):
            for j in range(blkCols):
                if((i == 0 or i == blkRows-1 or j == 0 or j == blkCols-1) and block[i][j] != 0):
                    angle = -math.degrees(math.atan2(i-CenterY, j-CenterX))
                    sumVal += 1
                    if(sumVal > 1):
                        angle = float('nan');
        return(angle)
    elif(minutiaeType.lower() == 'bifurcation'):
        (blkRows, blkCols) = np.shape(block);
        CenterX, CenterY = (blkRows - 1) / 2, (blkCols - 1) / 2
        angle = []
        sumVal = 0;
        for i in range(blkRows):
            for j in range(blkCols):
                if ((i == 0 or i == blkRows - 1 or j == 0 or j == blkCols - 1) and block[i][j] != 0):
                    angle.append(-math.degrees(math.atan2(i - CenterY, j - CenterX)))
                    sumVal += 1
        if(sumVal != 3):
            angle = float('nan')
        if type(angle) == list:
            return(angle[0])
        else:
            return (angle)


def extractMinutiaeFeatures(skel, minutiaeTerm, minutiaeBif):
    FeaturesTerm = []

    minutiaeTerm = skimage.measure.label(minutiaeTerm, connectivity=2);
    RP = skimage.measure.regionprops(minutiaeTerm)
    
    WindowSize = 2          
    FeaturesTerm = []
    for i in RP:
        (row, col) = np.int16(np.round(i['Centroid']))
        block = skel[row-WindowSize:row+WindowSize+1, col-WindowSize:col+WindowSize+1]
        angle = computeAngle(block, 'Termination')
        FeaturesTerm.append(MinutiaeFeature(row, col, angle, 'Termination'))

    FeaturesBif = []
    minutiaeBif = skimage.measure.label(minutiaeBif, connectivity=2);
    RP = skimage.measure.regionprops(minutiaeBif)
    WindowSize = 1 
    for i in RP:
        (row, col) = np.int16(np.round(i['Centroid']))
        block = skel[row-WindowSize:row+WindowSize+1, col-WindowSize:col+WindowSize+1]
        angle = computeAngle(block, 'Bifurcation')
        FeaturesBif.append(MinutiaeFeature(row, col, angle, 'Bifurcation'))
    return(FeaturesTerm, FeaturesBif)

def ShowResults(skel, TermLabel, BifLabel):
    minutiaeBif = TermLabel * 0;
    minutiaeTerm = BifLabel * 0;

    (rows, cols) = skel.shape
    DispImg = np.zeros((rows, cols, 3), np.uint8)
    DispImg[:, :, 0] = skel;
    DispImg[:, :, 1] = skel;
    DispImg[:, :, 2] = skel;

    RP = skimage.measure.regionprops(BifLabel)
    for idx, i in enumerate(RP):
        (row, col) = np.int16(np.round(i['Centroid']))
        minutiaeBif[row, col] = 1;
        (rr, cc) = skimage.draw.circle_perimeter(row, col, 1);
        skimage.draw.set_color(DispImg, (rr, cc), (255, 0, 0));

    RP = skimage.measure.regionprops(TermLabel)
    for idx, i in enumerate(RP):
        (row, col) = np.int16(np.round(i['Centroid']))
        minutiaeTerm[row, col] = 1;
        (rr, cc) = skimage.draw.circle_perimeter(row, col, 1);
        skimage.draw.set_color(DispImg, (rr, cc), (0, 0, 255));
        
    plt.figure(figsize=(6,6))
    plt.title("Minutiae extraction results")
    plt.imshow(DispImg)

Actual Code for LSH starts here

Using pyLSHash library for this purpose, can be found at the URL: https://github.com/guofei9987/pyLSHash

The hash functions we are using for this are randomized and we can save them as well if we find to be good

There are 128 hash functions that I am using for this program, number can be tweaked as per use case

I have made some changes to the above minutiae extraction code such that I am only using 1 angle instead of 3 angles from the bifurcation angles

In [5]:
from pickle import dump, load
from pyLSHash import LSHash

def createSet(img_name):
    img = cv2.imread(img_name,0);
    img = np.array(img > img.mean()).astype(int)
    skel = skimage.morphology.skeletonize(img)
    skel = np.uint8(skel)*255;
    mask = img*255;
    # Extract Minutiae
    (minutiaeTerm, minutiaeBif) = getTerminationBifurcation(skel, mask);
    FeaturesTerm, FeaturesBif = extractMinutiaeFeatures(skel, minutiaeTerm, minutiaeBif)

    # Convert minutiae to a dataframe for easy processsing
    df = pd.DataFrame([vars(f) for f in FeaturesTerm], dtype='int')
    df1 = pd.DataFrame([vars(f) for f in FeaturesBif], dtype='int')
    df = df.append(df1)
    # Drop
    df = df.dropna()
    # Represent strings with binary encoding
    df.loc[df.Type == 'Termination', 'Type'] = 0
    df.loc[df.Type == 'Bifurcation', 'Type'] = 1
    # Change data types to integer
    df['Type'] = df['Type'].astype(int)
    # Add path of image to recoginze later on
    df['Image_Name'] = img_name
    return df

def createHashTable(store=False):
    # Initialize our hash table
    lsh = LSHash(hash_size=128, input_dim=4)
    # lsh.load_uniform_planes('uniform_planes.pkl')
    # Loop over each image in dataset
    for img_name in display_list:
        # Extract minutiae of image
        df = createSet(img_name)
        # For each minutiae add it to the hashtable with the corresponding filename
        for element in df.itertuples():
            lsh.index(list(element)[1:-1], extra_data=list(element)[-1])
    if store:
        # Write hashtable to file for later use
        file = open('LSH.pkl', 'wb')
        dump(lsh, file)
        file.close()
    return lsh
lsh = createHashTable(store=True)


KeyboardInterrupt: 

To test against random 50 images

In [None]:
# Read LSH object from file
file = open('LSH.pkl', 'rb')
lsh = load(file)
file.close()
count = 0
print("Expected Result\t\t\t\t\t Actual Result")
for i in range(1):
    # Get random image to test
    test = display_list[random.randint(0, len(display_list) - 1)]
    # Extract minutiae of image
    df = createSet(test)
    results = []
    try:
        # Extract 50 random minutiae to query against
        df = df.sample(50)
        # Query against each minutiae and keep only top 40 results
        for element in df.itertuples():
            result = lsh.query(list(element)[1:-1])[:40]
            results.extend(result)
    except:
        print("Image has too few minutae for comparison, please choose another image!")
    from statistics import mode
    # Extract the most frequent filename in the matched query results
    match = mode([result[0][1] for result in results])
    unique, counts = np.unique([result[0][1] for result in results], return_counts=True)
    # Check if true to actual result or not
    test = test.rsplit('_', 1)[0]
    match = match.rsplit('_', 1)[0]
    print(test, match)
    count += test != match
print("Incorrect Results: ", count)

Expected Result					 Actual Result
139.BMP Real\139__M_Right_thumb_finger.BMP
Incorrect Results:  1


To Query against single image

In [None]:
# Function to find similar fingerprint given image of fingerprint
# LSHObject is the lsh hashtable of previous data, image is the image to match against
def query(lSHObject, image):
    df = createSet(image)
    results = []
    try:
        df = df.sample(50)
        for element in df.itertuples():
            result = lsh.query(list(element)[1:-1])[:40]
            results.extend(result)
    except:
        print("Image has too few minutae for comparison, please choose another image!")
    from statistics import mode
    match = mode([result[0][1] for result in results])
    return match

In [11]:
DATA_DIR = "Altered-Easy/"
list_dirs = list(glob.glob(DATA_DIR+"*.BMP"))
display_list = list_dirs[:2000]
print(display_list[0])
print(display_list[0].rsplit('_', 1)[0])

139.BMP
139.BMP
