In [1]:
import cv2
import numpy as np 
import matplotlib
from matplotlib import pyplot as plt
%matplotlib inline
import sklearn



# Face and eyes Dectection models

In [2]:
face_cascade = cv2.CascadeClassifier('./haarcascades/haarcascade_frontalface_default.xml')
eye_cascade = cv2.CascadeClassifier('./haarcascades/haarcascade_eye.xml')


# Data Cleaning

In [3]:
import pywt

In [4]:
def crop_image(image_path):
    img = cv2.imread(image_path)
    try:
        gray = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray,1.3,5)
        for (x,y,w,h) in faces:
            roi_color = img[y:y+h,x:x+w]
            roi_gray = gray[y:y+h,x:x+w] 
            eyes = eye_cascade.detectMultiScale(roi_gray)
            if len(eyes)>1:
                return roi_color
    except:
        pass
    


In [5]:
def w2d(img, mode='haar', level=1):
    imArray = img
    #Datatype conversions
    #convert to grayscale
    imArray = cv2.cvtColor( imArray,cv2.COLOR_RGB2GRAY )
    #convert to float
    imArray =  np.float32(imArray)   
    imArray /= 255;
    # compute coefficients 
    coeffs=pywt.wavedec2(imArray, mode, level=level)

    #Process Coefficients
    coeffs_H=list(coeffs)  
    coeffs_H[0] *= 0;  

    # reconstruction
    imArray_H=pywt.waverec2(coeffs_H, mode);
    imArray_H *= 255;
    imArray_H =  np.uint8(imArray_H)

    return imArray_H

In [6]:
path_of_data = './data/'
path_to_cr_data = './data/cropped/'

In [7]:
import os 
img_dirs =[]
for entry in os.scandir(path_of_data):
    if entry.is_dir():
        img_dirs.append(entry.path)

### Creating the folder --- Dont re-run it

In [8]:
import shutil
if os.path.exists(path_to_cr_data):
     shutil.rmtree(path_to_cr_data)
os.mkdir(path_to_cr_data)

### Saving all the cropped images

In [9]:
img_dirs

['./data/mbappe',
 './data/dhoni',
 './data/ronaldo',
 './data/virat',
 './data/messi',
 './data/cropped']

In [10]:

no = 1
for img_dir in img_dirs:
    count = 1
    celeb_name = img_dir.split('/')[-1]   #-----Names of the celeb
    for entry in os.scandir(img_dir):
        roi_color = crop_image(entry.path)
        try:

            if roi_color is not None:
                cropped_folder  = path_to_cr_data + celeb_name
                if not os.path.exists(cropped_folder):
                    os.makedirs(cropped_folder)

                    print("Generating cropped images in folder: ",cropped_folder)
                cropped_file_name = celeb_name.strip() + str(count) + ".png"
                cropped_file_path = cropped_folder.strip() + "/" + cropped_file_name
                cv2.imwrite(cropped_file_path, roi_color)
                count += 1
        except:
            continue

            
        



Generating cropped images in folder:  ./data/cropped/mbappe
Generating cropped images in folder:  ./data/cropped/dhoni
Generating cropped images in folder:  ./data/cropped/ronaldo
Generating cropped images in folder:  ./data/cropped/virat
Generating cropped images in folder:  ./data/cropped/messi


crop_file_name_dict

In [11]:
crop_image_dirs = []
for entry in os.scandir('./data/cropped/'):
    if entry.is_dir():
        crop_image_dirs.append(entry.path)
crop_image_dirs
        


['./data/cropped/mbappe',
 './data/cropped/dhoni',
 './data/cropped/ronaldo',
 './data/cropped/virat',
 './data/cropped/messi']

In [19]:
celebrity_file_names_dict = {}
for img_dir in crop_image_dirs:
    celebrity_name = img_dir.split('/')[-1]
    file_list = []
    for entry in os.scandir(img_dir):
        file_list.append(entry.path)
        
    celebrity_file_names_dict[celebrity_name] = file_list
celebrity_file_names_dict

{'mbappe': ['./data/cropped/mbappe/mbappe46.png',
  './data/cropped/mbappe/mbappe52.png',
  './data/cropped/mbappe/mbappe53.png',
  './data/cropped/mbappe/mbappe47.png',
  './data/cropped/mbappe/mbappe51.png',
  './data/cropped/mbappe/mbappe45.png',
  './data/cropped/mbappe/mbappe44.png',
  './data/cropped/mbappe/mbappe50.png',
  './data/cropped/mbappe/mbappe54.png',
  './data/cropped/mbappe/mbappe40.png',
  './data/cropped/mbappe/.DS_Store',
  './data/cropped/mbappe/mbappe41.png',
  './data/cropped/mbappe/mbappe55.png',
  './data/cropped/mbappe/mbappe43.png',
  './data/cropped/mbappe/mbappe57.png',
  './data/cropped/mbappe/mbappe56.png',
  './data/cropped/mbappe/mbappe42.png',
  './data/cropped/mbappe/mbappe31.png',
  './data/cropped/mbappe/mbappe19.png',
  './data/cropped/mbappe/mbappe18.png',
  './data/cropped/mbappe/mbappe30.png',
  './data/cropped/mbappe/mbappe24.png',
  './data/cropped/mbappe/mbappe32.png',
  './data/cropped/mbappe/mbappe27.png',
  './data/cropped/mbappe/mbappe33

In [20]:
class_dict = {}
count = 0
for celebrity_name in celebrity_file_names_dict.keys():
    class_dict[celebrity_name] = count
    count = count + 1
class_dict


{'mbappe': 0, 'dhoni': 1, 'ronaldo': 2, 'virat': 3, 'messi': 4}

In [21]:

X, y = [], []
for celebrity_name, training_files in celebrity_file_names_dict.items():
    
    for image in training_files:
        try:
            img = cv2.imread(image) 
            print(image)
            scalled_raw_img = cv2.resize(img, (32, 32))
            img_har = w2d(img,'db1',5)
            scalled_img_har = cv2.resize(img_har, (32, 32))
            combined_img = np.vstack((scalled_raw_img.reshape(32*32*3,1),scalled_img_har.reshape(32*32,1)))
            X.append(combined_img)
            y.append(class_dict[celebrity_name]) 
        except:
            continue



     
        




./data/cropped/mbappe/mbappe46.png
./data/cropped/mbappe/mbappe52.png
./data/cropped/mbappe/mbappe53.png
./data/cropped/mbappe/mbappe47.png
./data/cropped/mbappe/mbappe51.png
./data/cropped/mbappe/mbappe45.png
./data/cropped/mbappe/mbappe44.png
./data/cropped/mbappe/mbappe50.png
./data/cropped/mbappe/mbappe54.png
./data/cropped/mbappe/mbappe40.png
./data/cropped/mbappe/.DS_Store
./data/cropped/mbappe/mbappe41.png
./data/cropped/mbappe/mbappe55.png
./data/cropped/mbappe/mbappe43.png
./data/cropped/mbappe/mbappe57.png
./data/cropped/mbappe/mbappe56.png
./data/cropped/mbappe/mbappe42.png
./data/cropped/mbappe/mbappe31.png
./data/cropped/mbappe/mbappe19.png
./data/cropped/mbappe/mbappe18.png
./data/cropped/mbappe/mbappe30.png
./data/cropped/mbappe/mbappe24.png
./data/cropped/mbappe/mbappe32.png
./data/cropped/mbappe/mbappe27.png
./data/cropped/mbappe/mbappe33.png
./data/cropped/mbappe/mbappe37.png
./data/cropped/mbappe/mbappe23.png
./data/cropped/mbappe/mbappe22.png
./data/cropped/mbappe/m

In [22]:
X = np.array(X).reshape(len(X),4096).astype(float)
X.shape

(243, 4096)

In [23]:
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

In [24]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC(kernel = 'rbf', C = 10))])
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)

0.7704918032786885

In [25]:
print(classification_report(y_test, pipe.predict(X_test)))

              precision    recall  f1-score   support

           0       0.58      0.64      0.61        11
           1       0.80      0.80      0.80        10
           2       0.79      0.88      0.83        25
           3       1.00      0.75      0.86         8
           4       0.80      0.57      0.67         7

    accuracy                           0.77        61
   macro avg       0.79      0.73      0.75        61
weighted avg       0.78      0.77      0.77        61



In [26]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV

In [27]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto',probability=True),
        'params' : {
            'svc__C': [1,10,100,1000],
            'svc__kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'randomforestclassifier__n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'logisticregression__C': [1,5,10]
        }
    }
}

In [28]:
scores = []
best_estimators = {}
import pandas as pd
for algo, mp in model_params.items():
    pipe = make_pipeline(StandardScaler(), mp['model'])
    clf =  GridSearchCV(pipe, mp['params'], cv=5, return_train_score=False)
    clf.fit(X_train, y_train)
    scores.append({
        'model': algo,
        'best_score': clf.best_score_,
        'best_params': clf.best_params_
    })
    best_estimators[algo] = clf.best_estimator_
    
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.735886,"{'svc__C': 1, 'svc__kernel': 'linear'}"
1,random_forest,0.521321,{'randomforestclassifier__n_estimators': 10}
2,logistic_regression,0.78033,{'logisticregression__C': 5}


In [29]:
best_estimators

{'svm': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('svc',
                  SVC(C=1, gamma='auto', kernel='linear', probability=True))]),
 'random_forest': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('randomforestclassifier',
                  RandomForestClassifier(n_estimators=10))]),
 'logistic_regression': Pipeline(steps=[('standardscaler', StandardScaler()),
                 ('logisticregression',
                  LogisticRegression(C=5, solver='liblinear'))])}

In [30]:
best_estimators['svm'].score(X_test,y_test)

0.8360655737704918

In [31]:
best_estimators['random_forest'].score(X_test,y_test)

0.7049180327868853

In [32]:
best_estimators['logistic_regression'].score(X_test,y_test)

0.819672131147541

In [33]:
import pickle

In [34]:
best_clf = best_estimators['logistic_regression']

In [35]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, best_clf.predict(X_test))
cm

array([[ 7,  1,  2,  1,  0],
       [ 0,  9,  0,  0,  1],
       [ 1,  3, 21,  0,  0],
       [ 0,  1,  0,  7,  0],
       [ 0,  0,  1,  0,  6]])

In [36]:
import seaborn as sn
plt.figure(figsize = (10,7))
sn.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')

ModuleNotFoundError: No module named 'seaborn'

In [37]:
import joblib
joblib.dump(best_clf, 'saved_model.pkl') 


['saved_model.pkl']