# Workshop on (hand-crafted) feature representation

Course: Vision Systems

## Objective
In this workshop, we will develop a two-class fingerprint spoof classifier that uses Local Binary Patterns (LBP) and Histogram of Oriented Gradients (HOG) features with Support Vector Machines (SVM) to distinguish live fingerprints images from spoof samples.

Reference: https://github.com/shashank140195/LBP-HOG-SVM-Feature-Extraction


In [1]:
import cv2
import os
import math
import pandas as pd
from skimage.feature import local_binary_pattern
from sklearn import svm
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score

In [7]:
# Mount your drive
# Run this cell, then you’ll see a link, click on that link, allow access
# Copy the code that pops up, Paste it in the box, Hit enter

from google.colab import drive
drive.mount('/content/gdrive')

# Change working directory to be current folder, please keep ''/content/gdrive/My Drive/XXX' in the path
# and change XXX to be your own folder. The pathname is case sensitive.
import os
os.chdir('/content/gdrive/My Drive/recsys/iss/VSE/day1')
!ls


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
data


In [8]:
# method to create data set
def create_dataset(live_path, spoof_path, descriptor):

    # list to store extracted features of an image
    features = []

    # list to store class label, 1 for live, 0 for spoof
    labels = []

    radius = 3

    # number of neighbors to consider for LBP
    n_points = 8 * radius

    # sampling type for LBP
    METHOD = 'uniform'

    path_array = [live_path, spoof_path]

    for path in path_array:

        # storing all images in a folder in a list 'files'
        files = os.listdir(path)

        # loop through the images in the folder
        #for img in files:
        for img_index, img in enumerate(files):
            if (img_index % 50) ==0:
                print("%s: Process image %d/%d" % (path, img_index, len(files)))

            # reading the image in grayscale using cv2
            img = cv2.imread(path + img, cv2.IMREAD_GRAYSCALE)

            # resizing the image so all images are of same size
            IMG_HEIGHT = 128
            IMG_WIDTH = 64
            resized_img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))

            # Extracting features of an image using LBP
            if descriptor == 'LBP':
                lbp = local_binary_pattern(resized_img, n_points, radius, METHOD)

                 # Converting into 1-D array
                fd = lbp.flatten()

            # Extracting features of an image using HOG
            else:
                hog = cv2.HOGDescriptor(_winSize=(IMG_WIDTH, IMG_HEIGHT), _blockSize=(16, 16), _blockStride=(8,8), _cellSize=(8,8), _nbins=9)
                fd = hog.compute(resized_img)

            # label 1 for live images, 0 for spoof images
            class_identifier = 1
            if 'Spoof' in path:
                class_identifier = 0

             # appending exracted features to the list
            features.append(fd)

            #adding corresponding class label to the list
            labels.append(class_identifier)

    return features, labels


In [9]:
# Set up folders
training_live_path = "data/Training_Live/"
training_spoof_path = "data/Training_Spoof/"
testing_live_path = "data/Testing_Live/"
testing_spoof_path = "data/Testing_Spoof/"

In [10]:
# LBP + SVM

# Training and testing datasets
lbp_x_trn,lbp_y_trn = create_dataset(training_live_path,training_spoof_path, 'LBP')
lbp_x_tst,lbp_y_tst = create_dataset(testing_live_path,testing_spoof_path, 'LBP')

# Create and fit the model
lbp_clf = svm.SVC()
lbp_clf.fit(lbp_x_trn,lbp_y_trn)

# Predict on test data
lbp_y_pred = lbp_clf.predict(lbp_x_tst)

cf_matrix = confusion_matrix(lbp_y_tst, lbp_y_pred)
print(pd.DataFrame(cf_matrix, index=['Spoof', 'Live'], columns=['Spoof', 'Live']))

# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(lbp_y_tst, lbp_y_pred)
print('Accuracy: %.4f' % accuracy)

# precision tp / (tp + fp)
precision = precision_score(lbp_y_tst, lbp_y_pred, average='macro')
print('Precision: %.4f' % precision)

# recall: tp / (tp + fn)
recall = recall_score(lbp_y_tst, lbp_y_pred, average='macro')
print('Recall: %.4f' % recall)


data/Training_Live/: Process image 0/200
data/Training_Live/: Process image 50/200
data/Training_Live/: Process image 100/200
data/Training_Live/: Process image 150/200
data/Training_Spoof/: Process image 0/200
data/Training_Spoof/: Process image 50/200
data/Training_Spoof/: Process image 100/200
data/Training_Spoof/: Process image 150/200
data/Testing_Live/: Process image 0/200
data/Testing_Live/: Process image 50/200
data/Testing_Live/: Process image 100/200
data/Testing_Live/: Process image 150/200
data/Testing_Spoof/: Process image 0/200
data/Testing_Spoof/: Process image 50/200
data/Testing_Spoof/: Process image 100/200
data/Testing_Spoof/: Process image 150/200
       Spoof  Live
Spoof    198     2
Live      51   149
Accuracy: 0.8675
Precision: 0.8910
Recall: 0.8675


In [11]:
#HoG + SVM

# Training and testing datasets
hog_x_trn,hog_y_trn = create_dataset(training_live_path,training_spoof_path, 'HOG')
hog_x_tst,hog_y_tst = create_dataset(testing_live_path,testing_spoof_path, 'HOG')

# Create and fit the model
hog_clf = svm.SVC()
hog_clf.fit(hog_x_trn,hog_y_trn)

# Predict on test data
hog_y_pred = hog_clf.predict(hog_x_tst)

cf_matrix = confusion_matrix(hog_y_tst, hog_y_pred)
print(pd.DataFrame(cf_matrix, index=['Spoof', 'Live'], columns=['Spoof', 'Live']))

# accuracy: (tp + tn) / (p + n)
accuracy = accuracy_score(hog_y_tst, hog_y_pred)
print('Accuracy: %.4f' % accuracy)

# precision tp / (tp + fp)
precision = precision_score(hog_y_tst, hog_y_pred, average='macro')
print('Precision: %.4f' % precision)

# recall: tp / (tp + fn)
recall = recall_score(hog_y_tst, hog_y_pred, average='macro')
print('Recall: %.4f' % recall)

data/Training_Live/: Process image 0/200
data/Training_Live/: Process image 50/200
data/Training_Live/: Process image 100/200
data/Training_Live/: Process image 150/200
data/Training_Spoof/: Process image 0/200
data/Training_Spoof/: Process image 50/200
data/Training_Spoof/: Process image 100/200
data/Training_Spoof/: Process image 150/200
data/Testing_Live/: Process image 0/200
data/Testing_Live/: Process image 50/200
data/Testing_Live/: Process image 100/200
data/Testing_Live/: Process image 150/200
data/Testing_Spoof/: Process image 0/200
data/Testing_Spoof/: Process image 50/200
data/Testing_Spoof/: Process image 100/200
data/Testing_Spoof/: Process image 150/200
       Spoof  Live
Spoof    192     8
Live      55   145
Accuracy: 0.8425
Precision: 0.8625
Recall: 0.8425


In [None]:
# Provide your answers to Q1 here
# No, there isn't a requirement to use a colour histogram. We are more concerned with the intensity and pattern of image. Therefore, even using
# a simple grayscale histogram would be adequate.

# Provide your answers to Q2 here
# The LBP feature can account for multiple scale change features. It is resistant to resolution. The HOG can be used to identify rotation and then
# apply transform to counteract it.