## Import libs

In [8]:
import numpy as np
import cv2
import glob
import os
from skimage.feature import hog

## Preprocessing images

In [9]:
def clear_folder():
    roots = ['../data/ratio_h_features', '../data/HOG_features', '../data/all_features']
    total_files = 0

    for root in roots:
        for path in os.listdir(root):
            os.remove(os.path.join(root, path))
            total_files += 1
    
    print("Removed {} files".format(total_files))

In [19]:
rootdir = '../data/Raw/Training/'
total_img = 0
len_HOG_feature = None
len_hist_feature = None

# Clear feature folders
clear_folder()

# Initialize parameters for HOG - feature extraction algorithm

for path in glob.glob(f'{rootdir}/*/**'):
    img = cv2.imread(path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, (96, 128), interpolation = cv2.INTER_AREA)
    gray_arr = np.asarray(gray)

    # Ratio hair feature
    img_white = np.ones((128, 96), dtype = np.uint8)
    count = 0
    for i in range(0,128):
        for j in range(0,96):
            if gray_arr[i][j] <= 51 :
                img_white[i][j] = 0
                count += 1
    ratio_h = [count / (128*96)]

    # HOG features
    hog_features, hog_image = hog(img_white, orientations = 4, pixels_per_cell=(8, 8),
                    cells_per_block=(2, 2), visualize = True)

    orient_features = np.zeros((4))
    for i in range(len(hog_features)):
        orient_features[i%4] += hog_features[i]
        
    # Concatenate histograms features and HOG features
    merged_features = np.concatenate((orient_features, ratio_h))

    # Save feature
    np.save('../data/ratio_h_features/' + path.split('\\')[-1].split('.')[0], ratio_h)
    np.save('../data/HOG_features/' + path.split('\\')[-1].split('.')[0], orient_features)
    np.save('../data/all_features/' + path.split('\\')[-1].split('.')[0], merged_features)

    # Count number of images
    total_img += 1
    len_HOG_feature = len(orient_features)

### DONE
print("Length of HOG feature: {}".format(len_HOG_feature))
print("Length of hist_all_HOG feature: {}".format(len_HOG_feature + 1))
print("Processed: {} images".format(total_img))

Removed 300 files
Length of HOG feature: 4
Length of hist_all_HOG feature: 5
Processed: 100 images


## Calculate L2

In [20]:
def get_features(img, method):

    # Convert images to 
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, (96, 128), interpolation = cv2.INTER_AREA)
    gray_arr = np.asarray(gray)

    # Ratio hair feature
    img_white = np.ones((128, 96), dtype = np.uint8)
    count = 0
    for i in range(0,128):
        for j in range(0,96):
            if gray_arr[i][j] <= 51 :
                img_white[i][j] = 0
                count += 1
    ratio_h = [count / (128*96)]

    # HOG features
    hog_features, hog_image = hog(img_white, orientations=8, pixels_per_cell=(8, 8),
                    cells_per_block=(2, 2), visualize = True)
    orient_features = np.zeros((4))
    for i in range(len(hog_features)):
        orient_features[i%4] += hog_features[i]

    # Concatenate histograms features and HOG features
    merged_features = np.concatenate((orient_features, ratio_h))

    if method == "Ratio hair":
        return ratio_h
    elif method == "HOG":
        return orient_features
    else:
        return merged_features

In [28]:
def prediction(img, method):

    #Intialize params
    ratio_h_path = "../data/ratio_h_features/"
    hog_path = "../data/HOG_features/"
    all_features_path = "../data/all_features/"

    val = []
    idx = []

    current_features = get_features(img, method)

    if method == "Ratio hair":
        for path in glob.glob(f'{ratio_h_path}/*'):
            ratio_h_tmp = np.load(path)
            dist = np.linalg.norm(ratio_h_tmp - current_features)
            val.append(dist)
            idx.append(path)

    elif method == "HOG":
        for path in glob.glob(f'{hog_path}/*'):
            hog_tmp = np.load(path)
            dist = np.linalg.norm(hog_tmp - current_features)
            val.append(dist)
            idx.append(path)

    else:
        for path in glob.glob(f'{all_features_path}/*'):
            all_features_tmp = np.load(path)
            dist = np.linalg.norm(all_features_tmp - current_features)
            val.append(dist)
            idx.append(path)

    nearest_img = np.argsort(val)[:3]
    res_nu = 0
    for img_path in nearest_img:
        if 'nu' in idx[img_path]:
            res_nu += 1
    
    return 1 if res_nu > 1 else 0

In [29]:
from sklearn.metrics import precision_score, recall_score, f1_score
test_path = "../data/Raw/Test/"

gt = []
pred = []

for img_path in glob.glob(f'{test_path}/*'):
    img = cv2.imread(img_path)
    predicted_label = prediction(img, method="HOG")
    gt.append(1) if 'nu' in img_path.lower() else gt.append(0)
    pred.append(predicted_label)

print(precision_score(gt, pred, average='binary'))
print(recall_score(gt, pred, average='binary'))
print(f1_score(gt, pred, average='binary'))


0.75
0.9
0.8181818181818182
