In [1]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
%matplotlib inline

In [2]:
faceCascade = cv2.CascadeClassifier('./haarcascades/haarcascade_frontalface_default.xml')
eyesCascade = cv2.CascadeClassifier('./haarcascades/haarcascade_eye.xml')

In [3]:
path_to_data = './images_dataset/'
path_to_cropped_data = './images_dataset/cropped'

In [7]:
img_dirs = []
for files in os.scandir(path_to_data):
    if files.is_dir():
        img_dirs.append(files.path)
img_dirs

['./images_dataset/cristiano_ronaldo',
 './images_dataset/lionel_messi',
 './images_dataset/roger_federer',
 './images_dataset/serena_williams',
 './images_dataset/virat_kohli']

In [8]:
import shutil
if os.path.exists(path_to_cropped_data):
    shutil.rmtree(path_to_cropped_data)
os.mkdir(path_to_cropped_data)

In [4]:
def get_cropped_img(img_path):
    img = cv2.imread(img_path)
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    faces = faceCascade.detectMultiScale(gray_img, 1.5, 5)
    for (x,y,w,h) in faces:
        roi_gray = gray_img[y:y+h, x:x+w]
        roi_color = img[y:y+h, x:x+w]
        eyes = eyesCascade.detectMultiScale(roi_gray)
        if len(eyes) >= 2:
            return roi_color

In [9]:
cropped_img_dirs = []
celebrity_file_dict = {}
for img_dir in img_dirs:
    count = 1
    celebrity_name = img_dir.split('/')[-1]
    celebrity_file_dict[celebrity_name] = []
    for entry in os.scandir(img_dir):
        roi_color = get_cropped_img(entry.path)
        if roi_color is not None:
            cropped_folder = path_to_cropped_data + '/' + celebrity_name
            if not os.path.exists(cropped_folder):
                os.mkdir(cropped_folder)
                cropped_img_dirs.append(cropped_folder)
                print("Generating cropped folder for: ", celebrity_name)
            cropped_file_name = celebrity_name + str(count) + '.png'
            cropped_file_path = cropped_folder + '/' + cropped_file_name
            cv2.imwrite(cropped_file_path, roi_color)
            celebrity_file_dict[celebrity_name].append(cropped_file_path)
            count+=1

Generating cropped folder for:  cristiano_ronaldo
Generating cropped folder for:  lionel_messi
Generating cropped folder for:  roger_federer
Generating cropped folder for:  serena_williams
Generating cropped folder for:  virat_kohli


In [10]:
celebrity_file_dict

{'cristiano_ronaldo': ['./images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo1.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo2.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo3.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo4.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo5.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo6.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo7.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo8.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo9.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo10.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo11.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo12.png',
  './images_dataset/cropped/cristiano_ronaldo/cristiano_ronaldo13.png',
  './images_dataset/cropped/cristiano_ronaldo/cristi

In [11]:
celeb_num = {}
c = 0
for celeb_name in celebrity_file_dict.keys():
    celeb_num[celeb_name] = c
    c+=1
celeb_num

{'cristiano_ronaldo': 0,
 'lionel_messi': 1,
 'roger_federer': 2,
 'serena_williams': 3,
 'virat_kohli': 4}

In [13]:
import pywt

def w2d(img, mode='haar', level=1):
    imgArr = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    imgArr = np.float32(imgArr)
    imgArr /= 255
    coeffs = pywt.wavedec2(imgArr, mode, level=level)
    coef_H = list(coeffs)
    coef_H[0] *= 0;
    imgArr_H = pywt.waverec2(coef_H, mode);
    imgArr_H *= 255
    imgArr_H = np.uint8(imgArr_H)
    
    return imgArr_H
    

In [15]:
X = []
y = []
for celeb_name, celeb_files in celebrity_file_dict.items():
    for celeb_file in celeb_files:
        img = cv2.imread(celeb_file)
        if img is None:
            continue
        scaled_raw = cv2.resize(img, (32,32))
        img_har = w2d(img,'db1',5)
        scaled_img_har = cv2.resize(img_har, (32,32))
        combined_img = np.vstack((scaled_raw.reshape(32*32*3,1),scaled_img_har.reshape(32*32,1)))
        X.append(combined_img)
        y.append(celeb_num[celeb_name])

In [17]:
len(X[0])

4096

In [19]:
X = np.array(X).reshape(len(X),4096).astype(float)
X.shape

(158, 4096)

In [20]:
X[0]

array([ 69.,  57., 109., ...,  10.,  12.,   0.])