In [2]:
import numpy as np
import cv2
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import os
import pywt

In [3]:
img = cv2.imread("./test images/chalamettest1.jpg")
img.shape

(275, 183, 3)

In [4]:
face_cascade = cv2.CascadeClassifier("./haarcascades/haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier("./haarcascades/haarcascade_eye.xml")


In [5]:
def get_cropped_image_if_2_eyes(image_path):
    image_path = os.path.normpath(image_path)

    if not os.path.exists(image_path):
        print(f"Datei nicht gefunden: {image_path}")
        return None

    img = cv2.imread(image_path)
    if img is None:
        print(f"Konnte Bild nicht laden: {image_path}")
        return None

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, 1.3, 5)

    for (x, y, w, h) in faces:
        roi_color = img[y:y+h, x:x+w]
        eyes = eye_cascade.detectMultiScale(roi_color)
        if len(eyes) >= 2:
            return roi_color
    return None


In [6]:

path_to_cropped = "./dataset/cropped"
celebrity_file_names_dict = {}

for celebrity_folder in os.listdir(path_to_cropped):
    celeb_path = os.path.join(path_to_cropped, celebrity_folder)

    if os.path.isdir(celeb_path):
        celeb_name = celebrity_folder.lower().replace(" ", "_")
        image_paths = []

        for file in sorted(os.listdir(celeb_path)):
            image_path = os.path.join(celeb_path, file)
            image_paths.append(image_path)

        celebrity_file_names_dict[celeb_name] = image_paths



In [7]:

def w2d(img, mode = "haar", level =1):
    imArray = img

    imArray = cv2.cvtColor(imArray, cv2.COLOR_RGB2GRAY )
    imArray = np.float32(imArray)
    imArray /= 255
    
    coeffs = pywt.wavedec2(imArray, mode, level = level) 

    coeffs_H = list(coeffs)
    coeffs_H[0] *= 0

    imArray_H = pywt.waverec2(coeffs_H, mode)
    imArray_H *= 255;
    imArray_H = np.uint8(imArray_H)

    return imArray_H

In [8]:
class_dict = {}
count = 0

for celebrity_name in celebrity_file_names_dict.keys():
    class_dict[celebrity_name] = count
    count += 1

class_dict


{'millie_bobby_brown': 0,
 'timothee_chalamet': 1,
 'tom_holland': 2,
 'will_smith': 3,
 'zendaya': 4}

In [9]:
X = []
y = []

for celebrity_name, training_files in celebrity_file_names_dict.items():
    for training_image in training_files:
        img = cv2.imread(training_image)
        if img is None:
            continue
        scaled_raw_img = cv2.resize(img, (32, 32))
        img_har = w2d(img, 'db1', 5) 
        scaled_img_har = cv2.resize(img_har, (32, 32))
        combined_img = np.vstack((
            scaled_raw_img.reshape(32*32*3, 1),
            scaled_img_har.reshape(32*32, 1)
        ))

        X.append(combined_img)
        y.append(class_dict[celebrity_name])


In [10]:
X = np.array(X).reshape(len(X),4096).astype(float)
X.shape

(187, 4096)

In [11]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report


In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel='rbf', C=10))])
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)


0.723404255319149

In [13]:
len(X_test)

47

In [15]:
print(classification_report(y_test, pipe.predict(X_test)))

              precision    recall  f1-score   support

           0       0.62      0.80      0.70        10
           1       1.00      0.50      0.67         8
           2       0.57      0.67      0.62         6
           3       0.88      0.64      0.74        11
           4       0.73      0.92      0.81        12

    accuracy                           0.72        47
   macro avg       0.76      0.70      0.71        47
weighted avg       0.77      0.72      0.72        47



In [16]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV


In [17]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto', probability=True),
        'params': {
            'svc__C': [1, 10, 100, 1000],
            'svc__kernel': ['rbf', 'linear']
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params': {
            'randomforestclassifier__n_estimators': [1, 5, 10]
        }
    },
    'logistic_regression': {
        'model': LogisticRegression(solver='liblinear', multi_class='auto'),
        'params': {
            'logisticregression__C': [1, 5, 10]
        }
    }
}


In [18]:
scores = []
best_estimators = {}
import pandas as pd
for algo, mp in model_params.items():
    pipe = make_pipeline(StandardScaler(), mp['model'])
    clf = GridSearchCV(pipe, mp['params'], cv=5, return_train_score=False)
    clf.fit(X_train, y_train)
    scores.append({
        'model': algo,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    best_estimators[algo] = clf.best_estimator_
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df




Unnamed: 0,model,best_score,best_params
0,svm,0.742857,"{'svc__C': 1, 'svc__kernel': 'linear'}"
1,random_forest,0.514286,{'randomforestclassifier__n_estimators': 10}
2,logistic_regression,0.735714,{'logisticregression__C': 1}


In [20]:
best_estimators["svm"].score(X_test,y_test)

0.723404255319149

In [21]:
best_estimators["random_forest"].score(X_test,y_test)


0.6170212765957447

In [25]:
best_estimators["logistic_regression"].score(X_test,y_test)

0.8936170212765957

In [26]:
best_clf = best_estimators["logistic_regression"]

In [27]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, best_clf.predict(X_test))
cm


array([[ 7,  0,  2,  0,  1],
       [ 0,  6,  1,  0,  1],
       [ 0,  0,  6,  0,  0],
       [ 0,  0,  0, 11,  0],
       [ 0,  0,  0,  0, 12]])

In [28]:
!pip install joblib
import joblib

# Save the model as a pickle file
joblib.dump(best_clf, 'saved_model.pkl')




['saved_model.pkl']

In [29]:
import json

with open("class_dictionary.json", "w") as f:
    f.write(json.dumps(class_dict))
