In [4]:
import cv2
import numpy as np
import dlib
import os
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
from skimage.feature import hog
from skimage import exposure

In [5]:
dir_train = r'.\cropped_dataset\train'
dir_test = r'.\cropped_dataset\test'
dir_model = r'.\model'


detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor(dir_model + "\\" + "shape_predictor_68_face_landmarks.dat")

In [6]:
def edge_segmentation(image):
    edges = cv2.Canny(image, 100, 200)
    return edges

# Feature extraction (dummy example)

def extract_features(image):
    # Resize the image to 128x128
    image_resized = cv2.resize(image, (128, 128))
    # Compute HOG descriptors
    features, hog_image = hog(image_resized, orientations=9, pixels_per_cell=(8, 8),
                              cells_per_block=(4, 4), block_norm="L2", visualize=True)
    return features, hog_image

def apply_kmeans(image, n_clusters=4):
    pixels = image.reshape(-1, 3)
    kmeans = KMeans(n_clusters=n_clusters, n_init="auto")
    kmeans.fit(pixels)
    
    segmented_image = kmeans.cluster_centers_[kmeans.labels_]
    segmented_image = segmented_image.reshape(image.shape).astype('uint8')

    return segmented_image

In [7]:
def feature_engineering(path):
    labels = []
    hog_features = []
    shape_types = ['Heart', 'Oblong', 'Oval', 'Round', 'Square']

    for shape in shape_types:
        print(f'Loading {shape} images...')
        traindir = os.listdir(path + "\\" + shape)
        for imgName in traindir:
            print(f'Processing {imgName}...', end=" ")
            img_path = os.path.join(path, shape, imgName)
            img = cv2.imread(img_path)
            # Edge segmentation + Kmeans
            # img = apply_kmeans(img)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            # img = cv2.GaussianBlur(img, (5, 5), 0)
            # img = edge_segmentation(img)
            
            # Hog feature
            hog_feature , _ = extract_features(img)
            hog_features.append(hog_feature)
            labels.append(shape_types.index(shape))
        print("")
        
    labels = np.array(labels).reshape(len(labels), 1)
    hog_features = np.array(hog_features)

    data = np.hstack((hog_features, labels))
    return data

In [8]:
data_train = feature_engineering(dir_train)
data_test = feature_engineering(dir_test)

Loading Heart images...
Processing Heart (0).jpg... Processing Heart (1).jpg... Processing Heart (10).jpg... Processing Heart (100).jpg... Processing Heart (101).jpg... Processing Heart (102).jpg... Processing Heart (103).jpg... Processing Heart (104).jpg... Processing Heart (105).jpg... Processing Heart (106).jpg... Processing Heart (107).jpg... Processing Heart (108).jpg... Processing Heart (109).jpg... Processing Heart (11).jpg... Processing Heart (110).jpg... Processing Heart (111).jpg... Processing Heart (112).jpg... Processing Heart (113).jpg... Processing Heart (114).jpg... Processing Heart (115).jpg... Processing Heart (116).jpg... Processing Heart (117).jpg... Processing Heart (118).jpg... Processing Heart (119).jpg... Processing Heart (12).jpg... Processing Heart (120).jpg... Processing Heart (121).jpg... Processing Heart (122).jpg... Processing Heart (123).jpg... Processing Heart (124).jpg... Processing Heart (125).jpg... Processing Heart (126).jpg... Processing Heart (127).

In [9]:
np.random.shuffle(data_train)
x_train = data_train[:, :-1]
y_train = data_train[:, -1]
np.random.shuffle(data_test)
x_test = data_test[:, :-1]
y_test = data_test[:, -1]

## ĐÂY LÀ PHẦN BUILD MODEL

In [8]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

para = {
    'C': [1, 2, 5],
    'gamma': [0.1, 0.01, 1, "scale"]
}
model = SVC(kernel = 'rbf', random_state = 42)
search = GridSearchCV(model, para)
search.fit(x_train[:300], y_train[:300])

In [9]:
best_params = search.best_params_
best_score = search.best_score_

print(f"Best parameters: {best_params}")
print(f"Best cross-validation score: {best_score}")

Best parameters: {'C': 5, 'gamma': 'scale'}
Best cross-validation score: 0.43666666666666665


In [10]:
from sklearn.svm import SVC

model = SVC(kernel = 'rbf', C = 5, gamma="scale")
model.fit(x_train, y_train)

In [11]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
y_pred = model.predict(x_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[142  17  22  15   4]
 [ 25 133  22  15   5]
 [ 29  40 100  26   5]
 [ 14   9  24 132  21]
 [  6  27  30  61  76]]


0.583

### Save model after training (if training on tensorflow, please save model using this [link](https://www.geeksforgeeks.org/save-and-load-models-in-tensorflow/))

In [None]:
%pip install joblib

In [None]:
import pickle

# DON'T RUN IF YOU ALREADY HAVE THE MODEL FILES
file_name = './model/ml-model.pkl'
with open(file_name, 'wb') as file:
    pickle.dump(model, file)