In [1]:
# Utilities
from itertools import chain,cycle
import os
# os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"
from glob import glob
import pickle
import concurrent.futures # for multithreading

# Data processing and Math
import numpy as np
import pandas as pd
from IPython.display import display_html

# Preprocessing and Feature extraction
import cv2
from skimage.feature import local_binary_pattern, hog, graycomatrix, graycoprops

# Classification
from sklearnex import patch_sklearn
patch_sklearn()
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
def do_parallel_work(function : callable, *function_arguments : list[any]) -> list[any]:
    """Run a function in parallel on multiple threads with the given arguments.\n
    Notice that the function must be thread-safe.\n
    Notice that the function arguments are passed as a list of each argument not a list of all arguments.\n
    e.g.:\n
        do_parallel_work(function, [1, 2, 3], [4, 5, 6])\n
        executes the function with the arguments:\n
        function(1, 4)\n
        function(2, 5)\n
        function(3, 6)\n
    Returns a list of the function's return values.
    """
    with concurrent.futures.ThreadPoolExecutor() as executor:
        return list(executor.map(function, *function_arguments))

In [3]:
# Shrink images
shrink_factor = 4

def read_and_shrink(file):
    img = cv2.imread(f"{file}")
    img = cv2.resize(img, (512//shrink_factor, 512//shrink_factor))
    cv2.imwrite(f"{file}", img)

_ = [read_and_shrink(file) for file in glob(f"samples/*/*")]
# for file in glob(f"samples/*/*"):
#     img = cv2.imread(f"{file}")
#     img = cv2.resize(img, (512//shrink_factor, 512//shrink_factor))
#     cv2.imwrite(f"{file}", img)

In [4]:
#Define Processing Functions Here

def wb(channel, perc = 5):
    mi, ma = (np.percentile(channel, perc), np.percentile(channel,100.0-perc))
    channel = np.uint8(np.clip((channel-mi)*255.0/(ma-mi+0.01), 0, 255))
    return channel

def process_YCrCb(img):
    
    yCrCb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)

    lower_color = np.array([80, 140, 115])
    upper_color = np.array([255, 160, 135])
    color_mask = cv2.inRange(yCrCb, lower_color, upper_color)
    
    return color_mask

def process_chroma_balance(img):
    yCrCb = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
    yCrCb[:,:,1] = wb(yCrCb[:, :, 1])
    yCrCb = cv2.cvtColor(yCrCb, cv2.COLOR_YCrCb2BGR)
    yCrCb[np.argmax(yCrCb, axis=2) == 0] = 0
    yCrCb[np.argmax(yCrCb, axis=2) == 1] = 0
    yCrCb[np.argmax(yCrCb, axis=2) == 2] = 255

    return yCrCb

def process_Canny(img):
    grey = np.array(img)
    if len(img.shape)==3:
        grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(grey, 0, 0.3, apertureSize=7)
    
    return edges

def process_Laplacian(img):
    grey = np.array(img)
    if len(img.shape)==3:
        grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Laplacian(grey, -1, ksize=5)
    
    return edges

def process_Sobel(img):
    grey = np.array(img)
    if len(img.shape)==3:
        grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    edges = cv2.Sobel(grey, -1, 1, 1)
    
    return edges


def process_HOG(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    h = hog(gray, orientations=8, pixels_per_cell=(16, 16))
    # hog = cv2.HOGDescriptor()
    # h = hog.compute(img, winStride=(32,32))
    return h

clf = pickle.load(open("./decision_tree.pkl", "rb"))

def skin_dumb(img):
    img_rgb = np.array(img)
    img_rgb = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2RGB)
    # clf = pickle.load(open("./decision_tree.pkl", "rb"))
    img_len = img_rgb.shape[0] * img_rgb.shape[1]
    X = np.zeros((img_len, 14))

    X[:, 0] = img_rgb[:, :, 0].reshape(-1)
    X[:, 1] = img_rgb[:, :, 1].reshape(-1)
    X[:, 2] = img_rgb[:, :, 2].reshape(-1)
    img_hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV)
    X[:, 3] = img_hsv[:, :, 0].reshape(-1)
    X[:, 4] = img_hsv[:, :, 1].reshape(-1)
    X[:, 5] = img_hsv[:, :, 2].reshape(-1)
    img_lab = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2LAB)
    X[:, 6] = img_lab[:, :, 0].reshape(-1)
    X[:, 7] = img_lab[:, :, 1].reshape(-1)
    X[:, 8] = img_lab[:, :, 2].reshape(-1)
    img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
    X[:, 9] = img_gray.reshape(-1)
    img_ycrcb = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2YCrCb)
    X[:, 10] = img_ycrcb[:, :, 0].reshape(-1)
    X[:, 11] = img_ycrcb[:, :, 1].reshape(-1) 
    X[:, 12] = img_ycrcb[:, :, 2].reshape(-1)
    img_lbp = local_binary_pattern(img_gray, 8, 1, method='uniform')
    X[:, 13] = img_lbp.reshape(-1)

    mask = clf.predict(X).reshape(img_rgb.shape[0], img_rgb.shape[1])

    img_rgb[mask == 0] = 0

    img_rgb = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
    return mask

def process_MeanShift(img, sp, sr):
    rgb_img = np.array(img)
    return cv2.pyrMeanShiftFiltering(rgb_img, sp, sr)

In [5]:
# Prepare results directory
results_dir = "./results"
for folder in glob("*", root_dir="samples/"):
    os.makedirs(f"{results_dir}/{folder}", exist_ok=True)

In [6]:
# Build Preprocessing Pipeline Here
def preprocess(img : np.ndarray) -> tuple[np.ndarray]:
    """Processes the given image.\n
    Returns a tuple of 1d arrays of the features extracted from the image.
    """
    # img = cv2.GaussianBlur(img, (3, 3), 8)
    mask = skin_dumb(img)
    mask = np.uint8(mask) * 255
    SE = np.array([
        [0, 0, 1, 0, 0],
        [0, 1, 1, 1, 0],
        [0, 1, 1, 1, 0],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1],
        [0, 1, 1, 1, 0],
        [0, 1, 1, 1, 0],
        [0, 0, 1, 0, 0]
    ], dtype=np.uint8)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, SE, iterations=9)
    ctrs, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    # print(ctrs[0][0])
    if len(ctrs) != 0:
        x, y, w, h = cv2.boundingRect(max(ctrs, key=cv2.contourArea))
        # cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
        img = img[y:y+h, x:x+w]
        img = cv2.resize(img, (512//shrink_factor, 512//shrink_factor))
    # img[mask == 0] = 0
    h = process_HOG(img).reshape(-1)
    return (h,)

def path_to_feature_vect(img_path : str, img_class : int) -> np.ndarray:
    """Extracts the feature vector from the given image path using the given feature extractor.\n
    Returns the image class and feature vector.
    """
    img = cv2.imread(img_path)
    features = preprocess(img)
    return np.concatenate(([img_class], *features), axis=0)

In [7]:
# Save results if you want to see what the processing produces
def change_to_results(file):
    file_name = file.split('\\')[-1]
    folder_name = file.split('\\')[-2]
    return f"{results_dir}/{folder_name}/{file_name}"


save_results = False
if save_results:
    files = glob(f"samples/*/*")
    imgs = do_parallel_work(cv2.imread, files)
    result_files = do_parallel_work(change_to_results, files)
    preprocessed_imgs = do_parallel_work(preprocess, imgs)
    do_parallel_work(cv2.imwrite, result_files, imgs)
    # [cv2.imwrite(result_files[i], preprocessed_imgs[i][1]) for i in range(len(preprocessed_imgs))]

In [8]:
# Prepare Data for Classifier
NUM_FEATURES = path_to_feature_vect("./samples/one/1.jpeg", 0).shape[0] - 1
classes = ['one', 'two_up', 'three', 'four', 'palm']
y = []
training_images = []
num_images = len(glob(f"samples/*/*"))
# Change Size of feature vector based on the processing functions you use
array = np.empty(shape=(num_images, 1 + NUM_FEATURES), dtype=np.float32)

class_start_index = 0

for i, folder in enumerate(classes):
    files = glob(f"samples/{folder}/*")
    class_length = len(files)
    array[class_start_index : class_start_index + class_length] = do_parallel_work(path_to_feature_vect, files, cycle([i]))
    class_start_index += class_length

print(array.shape)
sample_dataset = array

(5000, 2593)


In [7]:
# Classifier
def display_side_by_side(*args,titles=cycle([''])):
    html_str=''
    for df,title in zip(args, chain(titles,cycle(['</br>'])) ):
        # html_str+='<th style="text-align:center"><td style="vertical-align:top">'
        # html_str+=f'<h2 style="text-align: center;">{title}</h2>'
        html_str+=df.to_html().replace('table','table style="display:inline"')
        # html_str+='</td></th>'
    display_html(html_str,raw=True)

def model_prediction(model, x_training, y_training, x_validation, y_validation, weights=None):
    model.fit(x_training, y_training, sample_weight=weights)
    prediction_training = np.array(np.round(model.predict(x_training)), dtype=np.int8)
    prediction_validation = np.array(np.round(model.predict(x_validation)), dtype=np.int8)
    acc_training = accuracy_score(y_training, prediction_training)
    acc_validation = accuracy_score(y_validation, prediction_validation)

    confusion_matrix_training = confusion_matrix(y_training, prediction_training)
    confusion_matrix_validation = confusion_matrix(y_validation, prediction_validation)
    # print("Confusion Matrix Training: \n", confusion_matrix_training)
    display_side_by_side(pd.DataFrame(confusion_matrix_training), pd.DataFrame(confusion_matrix_validation))
    # print("Confusion Matrix Validation: \n", confusion_matrix_validation)
    print("Accuracy Score Training: ", acc_training)
    print("Accuracy Score Validation: ",acc_validation)



train, test = train_test_split(sample_dataset, test_size=0.1)
X_train = train[:,1:]
Y_train = train[:,0]
X_test = test[:,1:]
Y_test = test[:,0]


pca = PCA(n_components=200).fit(sample_dataset[:,1:])
# model = RandomForestClassifier(n_estimators=1000, criterion='log_loss')
model = SVC(C=40)
model_prediction(model, pca.transform(X=X_train), Y_train, pca.transform(X=X_test), Y_test)

Unnamed: 0,0,1,2,3,4,Unnamed: 6,Unnamed: 7
0,892,0,1,0,1,,
1,0,888,1,0,4,,
2,0,0,880,0,2,,
3,32,0,0,0,913,3.0,
4,35,1,0,0,2,0.0,913.0

Unnamed: 0,0,1,2,3,4
0,54,26,8,12,6
1,25,58,14,7,3
2,16,22,41,21,18
3,11,4,14,41,14
4,9,5,10,12,49


Accuracy Score Training:  0.9968888888888889
Accuracy Score Validation:  0.486
