In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades +'haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades +  'haarcascade_eye.xml')

In [7]:
def get_cropped_image_if_2_eyes(image_path):
    img = cv2.imread(image_path)
    if img is None:
        return None
    gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray,1.3,5)
    for (x,y,w,h) in faces:
        roi_gray = gray[y:y+h , x:x+w]
        roi_color = img[y:y+h , x:x+w]
        eyes = eye_cascade.detectMultiScale(roi_gray)
        if(len(eyes)>=2):
            return roi_color
    return None
        

In [8]:
path_to_data = "C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/"
path_to_cr_data = "C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/cropped/"
import os
img_dirs = []
for entry in os.scandir(path_to_data):
    if(entry.is_dir()):
        img_dirs.append(entry.path)

In [9]:
img_dirs

['C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/Cristiano_Ronaldo',
 'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/cropped',
 'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/Lionel_Messi',
 'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/Maria_Sharapova',
 'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/Serena_Williams',
 'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/Virat_Kohli']

In [11]:
import shutil
if os.path.exists(path_to_cr_data):
    shutil.rmtree(path_to_cr_data)
os.mkdir(path_to_cr_data)

In [12]:
def has_image_files(directory):
    image_extensions = ('.png', '.jpg', '.jpeg')
    for entry in os.scandir(directory):
        if entry.is_file() and entry.name.lower().endswith(image_extensions):
            return True
    return False

cropped_img_dirs = []
celebrity_file_names_dict = {}

for img_dir in img_dirs:
    if not has_image_files(img_dir):
        continue  # Skip directories without image files
    
    count = 1
    celebrity_name = os.path.basename(img_dir)
    celebrity_file_names_dict[celebrity_name] = []
    cropped_folder = os.path.join(path_to_cr_data, celebrity_name)
    if not os.path.exists(cropped_folder):
        os.makedirs(cropped_folder)
        cropped_img_dirs.append(cropped_folder)
    
    for entry in os.scandir(img_dir):
        if entry.is_file() and entry.name.lower().endswith(('.png', '.jpg', '.jpeg')):
            roi_color = get_cropped_image_if_2_eyes(entry.path)
            if roi_color is not None:
                cropped_file_name = f"{celebrity_name}_{count}.png"
                cropped_file_path = os.path.join(cropped_folder, cropped_file_name)
                
                cv2.imwrite(cropped_file_path, roi_color)
                celebrity_file_names_dict[celebrity_name].append(cropped_file_path)
                count += 1

In [13]:
import pywt


In [19]:
def w2d(img,mode='haar',level=1):
    imArray = img
    
    imArray = cv2.cvtColor(imArray,cv2.COLOR_BGR2GRAY)
    
    imArray = np.float32(imArray)
    imArray /=255
    coeffs = pywt.wavedec2(imArray,mode,level=level)
    
    coeffs_H = list(coeffs)
    coeffs_H[0] *=0
    
    imArray_H = pywt.waverec2(coeffs_H,mode)
    imArray_H *=255
    imArray_H = np.uint8(imArray_H)
    
    return imArray_H

In [15]:
celebrity_file_names_dict

{'Cristiano_Ronaldo': ['C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/cropped/Cristiano_Ronaldo\\Cristiano_Ronaldo_1.png',
  'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/cropped/Cristiano_Ronaldo\\Cristiano_Ronaldo_2.png',
  'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/cropped/Cristiano_Ronaldo\\Cristiano_Ronaldo_3.png',
  'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/cropped/Cristiano_Ronaldo\\Cristiano_Ronaldo_4.png',
  'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/cropped/Cristiano_Ronaldo\\Cristiano_Ronaldo_5.png',
  'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/cropped/Cristiano_Ronaldo\\Cristiano_Ronaldo_6.png',
  'C:/Users/krish/OneDrive/Desktop/MACHINE LEARNING/Image Classification/model/Pictures/cropped/Cristiano_Ronaldo\\Cristiano_Ronaldo_

In [20]:
class_dict = {}
count =0
for celebrity_name in celebrity_file_names_dict.keys():
    class_dict[celebrity_name] = count
    count+=1
    

In [21]:
X =[]
y=[]

for celebrity_name, training_files in celebrity_file_names_dict.items():
    for training_image in training_files:
        img = cv2.imread(training_image)
        if img is None:
            continue
        scalled_raw_img = cv2.resize(img,(32,32))
        img_har = w2d(img,'db1',5)
        scalled_img_har = cv2.resize(img_har,(32,32))
        combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
        X.append(combined_img)
        y.append(class_dict[celebrity_name])
X = np.array(X).reshape(len(X), 32 * 32 * 3 + 32 * 32).astype(float)
y = np.array(y)

In [22]:
len(X)

489

In [23]:
len(y)

489

In [31]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [46]:

model_params = {
    'svm': {
        'model' : SVC(gamma='auto',probability=True),
        'params' : {
            'C' : [1,10,20],
            'kernel':['rbf','linear']
        }
    },
    'random_forest' : {
        'model' : RandomForestClassifier(),
        'params' : {
            'n_estimators' : [1,5,10]
        }
    },
    'logistic_regression' : {
        'model' : LogisticRegression(solver='liblinear',multi_class='auto'),
        'params' : {
            'C' : [1,5,10]
        }
    }
}


scores = []
best_estimators = {}

for model_name, mp in model_params.items():
    clf = GridSearchCV(mp['model'],mp['params'],cv=5,return_train_score=False)
    
    clf.fit(X,y)
    scores.append({
        'model' : model_name,
        'best_score' : clf.best_score_,
        'best_params':clf.best_params_
    })
    best_estimators[model_name] = clf.best_estimator_

dataframe_scores= pd.DataFrame(scores,columns=['model','best_score','best_params'])
    
    



In [47]:
dataframe_scores

Unnamed: 0,model,best_score,best_params
0,svm,0.846728,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.636082,{'n_estimators': 10}
2,logistic_regression,0.828319,{'C': 1}


In [48]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y)


In [49]:
best_estimators

{'svm': SVC(C=1, gamma='auto', kernel='linear', probability=True),
 'random_forest': RandomForestClassifier(n_estimators=10),
 'logistic_regression': LogisticRegression(C=1, multi_class='auto', solver='liblinear')}

In [33]:
from sklearn.pipeline import Pipeline

svm = SVC(C=1,kernel='linear')


pipeline = Pipeline(steps=[
    ('scaler',StandardScaler()),
    ('svc',svm)
])


In [34]:
pipeline.fit(X_train,y_train)

In [37]:
pipeline.score(X_test,y_test)

0.926829268292683

In [38]:
import joblib

In [50]:
best_clf = best_estimators['svm']

In [51]:
joblib.dump(best_clf,'saved_model.pkl')

['saved_model.pkl']

In [45]:
import json
with open("class_dictionary.json","w") as f:
    f.write(json.dumps(class_dict))