In [25]:
import numpy as np
import cv2
import os
import pywt
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
eye_cascade = cv2.CascadeClassifier('./cascades/haarcascade_eye.xml')
face_cascade = cv2.CascadeClassifier('./cascades/haarcascade_frontalface_default.xml')

In [9]:
def get_cropped_image_if2eyes(image_path):
    img = cv2.imread(image_path)
    if img is None:
        print(f"Error: Unable to load image '{image_path}'")
        return None
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray_img, 1.3, 5)
    for x, y, w, h in faces:
        roi_gray = gray_img[y:y + h, x:x + w]
        roi_color = img[y:y + h, x:x + w]
        eyes = eye_cascade.detectMultiScale(roi_gray)
        if len(eyes) >= 2:
            return roi_color
    

In [4]:
db_path = './dataset'
db_cropped_path = './dataset\\cropped'


In [6]:
img_dirs = []
for entry in os.scandir(db_path):
    if entry.is_dir():
        img_dirs.append(entry.path)
img_dirs        

['./dataset\\cristiano ronaldo',
 './dataset\\jason statham',
 './dataset\\john wick',
 './dataset\\leonardo dicaprio',
 './dataset\\vin diesel']

In [10]:
cropped_image_dirs = []
cropped_file_name_dict = {}

for img_dir in img_dirs:
    count = 1
    celeb_name = str(img_dir).split('\\')[-1]
    cropped_file_name_dict[celeb_name] = []
    print(celeb_name)
    for img in os.scandir(img_dir):
        if img is None:
            print(1)
        roi_color = get_cropped_image_if2eyes(img.path)
        # roi_color = 
        if roi_color is not None:
            cropped_folder = db_cropped_path + '\\' + celeb_name
            if not os.path.exists(cropped_folder):
                os.mkdir(cropped_folder)
                cropped_image_dirs.append(cropped_folder)
                print(f"Folder with path {cropped_folder } has been created")

            cropped_image_name = f"{celeb_name}_{str(count)}.jpg"
            cropped_file_path =  cropped_folder + '\\' + cropped_image_name

            cv2.imwrite(cropped_file_path, roi_color)
            cropped_file_name_dict[celeb_name].append(cropped_file_path)
            count += 1
                

cristiano ronaldo
Folder with path ./dataset\cropped\cristiano ronaldo has been created
jason statham
Folder with path ./dataset\cropped\jason statham has been created
Error: Unable to load image './dataset\jason statham\jason-statham-attends-the-press-conference-of-director-f-gary-grays-film-the-fate-of-the-furious-on-march-23-2017-in-beijing-china-photo-by-vcg_vcg-via-getty-images-square.jpg'
john wick
Folder with path ./dataset\cropped\john wick has been created
Error: Unable to load image './dataset\john wick\Reunião_com_o_ator_norte-americano_Keanu_Reeves_(46806576944)_(cropped).jpg'
leonardo dicaprio
Folder with path ./dataset\cropped\leonardo dicaprio has been created
vin diesel
Folder with path ./dataset\cropped\vin diesel has been created


In [17]:
def w2d(img, mode='db1', level=1):
    imArray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    imArray = np.float32(imArray)
    imArray /= 255
    coeff = pywt.wavedec2(imArray, mode, level)
    coeff_H = list(coeff)
    coeff_H[0] *= 0
    imArray_H = pywt.waverec2(coeff_H, mode)
    imArray_H *= 255
    imArray_H = np.uint8(imArray_H)
    return imArray_H


In [19]:
class_dict = {}
for a, b in enumerate(cropped_file_name_dict.keys()):
    class_dict[b] = a
class_dict    

{'cristiano ronaldo': 0,
 'jason statham': 1,
 'john wick': 2,
 'leonardo dicaprio': 3,
 'vin diesel': 4}

In [20]:
X = []
y = []

for cel_name, images in cropped_file_name_dict.items():
    for image in images:
        img = cv2.imread(image)
        if img is None:
            continue
        scaled_raw = cv2.resize(img, (32, 32))
        har_img = w2d(img, level=5)
        scaled_raw_har = cv2.resize(har_img, (32, 32))
        combined_image = np.vstack((scaled_raw.reshape(32*32*3, 1), scaled_raw_har.reshape(32*32, 1)))
        X.append(combined_image)
        y.append(class_dict[cel_name])

In [27]:
X = np.array(X).reshape(len(X), 4096).astype(float)
X.shape

(268, 4096)

In [38]:
from sklearn import svm
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import  Pipeline, make_pipeline
from sklearn.metrics import classification_report 
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import pandas as pd

In [33]:
x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=.2)

pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel='rbf', C=10))])
pipe.fit(x_train, y_train)
pipe.score(x_test, y_test)

0.7777777777777778

In [35]:
print(classification_report(y_test, pipe.predict(x_test)))

              precision    recall  f1-score   support

           0       0.88      0.70      0.78        10
           1       0.89      0.80      0.84        10
           2       0.93      0.93      0.93        15
           3       0.60      0.75      0.67        12
           4       0.57      0.57      0.57         7

    accuracy                           0.78        54
   macro avg       0.77      0.75      0.76        54
weighted avg       0.79      0.78      0.78        54


In [42]:
model_params = {
    'svm': {
        'model': SVC(gamma='auto', probability=True),
        'params': {
            'svc__C': [1, 10, 100, 1000],
            'svc__kernel': ['rbf', 'linear']
        }
    },
    'log_reg': {
        'model': LogisticRegression(solver='liblinear', multi_class='auto'),
        'params': {
            'logisticregression__C': [1, 5, 10]  # Adjusted parameter name
        }
    },
    'forest': {
        'model': RandomForestClassifier(),
        'params': {
            'randomforestclassifier__n_estimators': [1, 5, 10]
        }
    }
}


In [43]:
scores = []
best_estimator = {}

for alf, md in model_params.items():
    pipe = make_pipeline(StandardScaler(), md['model'])
    clf = GridSearchCV(pipe, md['params'], cv=5, return_train_score=False)
    clf.fit(x_train, y_train)
    scores.append({
        'model': alf,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    best_estimator[alf] = clf.best_estimator_
    
df = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.840975,"{'svc__C': 1, 'svc__kernel': 'linear'}"
1,log_reg,0.827021,{'logisticregression__C': 1}
2,forest,0.644297,{'randomforestclassifier__n_estimators': 10}


In [44]:
best_estimator['svm'].score(x_test, y_test)

0.8333333333333334

In [46]:
best_estimator['log_reg'].score(x_test, y_test)

0.8703703703703703

In [45]:
best_estimator['forest'].score(x_test, y_test)

0.5370370370370371