### Install Dependencies

In [8]:
# %pip install pandas
# %pip install numpy
# %pip install scikit-learn
# %pip install scikit-image
# %pip install opencv-python
# %pip install Pillow

### Includes

In [9]:
import cv2
import pandas as pd
import numpy as np
import glob
import pickle
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn import svm
from skimage.morphology import convex_hull_image
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from os import listdir
from PIL import Image



### Compress images

In [10]:
def compress_image(path : str):
	im = Image.open(path)
	im.thumbnail((500, 500), Image.LANCZOS)
	return cv2.cvtColor(np.array(im),cv2.COLOR_RGB2BGR)

### Read dataset

In [11]:
def read_image(path: str) -> np.array:
	return compress_image(path).reshape(-1)

def read_images(directory : str) -> np.array:
	array = np.array([], dtype=np.int8).reshape(0,281*500*3+1)
	for num in range(0,6):
		men_dir = directory + f'men/{num}/'
		women_dir = directory + f'Women/{num}/'
		new_men = np.array([np.concatenate((np.asarray([num]), read_image(men_dir + file)), axis=0) for file in listdir(men_dir)])
		new_women = np.array([np.concatenate((np.asarray([num]), read_image(women_dir + file)), axis=0) for file in listdir(women_dir)])
		array = np.concatenate((array, new_men, new_women), axis=0)
	return array

def preprocess(img) -> np.array:
	img = img.reshape(281,500,3)
	img = cv2.cvtColor(img,cv2.COLOR_BGR2YCrCb)
	avg_luma = np.mean(img[:,:,0])
	if avg_luma>196:
		img = cv2.inRange(img,(0,133,77),(255,163,140))
	else:
		img = cv2.inRange(img,(0,137,77),(255,163,140))
	blur = cv2.medianBlur(img, 5)
	kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 8))
	img_filtered = cv2.dilate(blur, kernel)
	ctrs, _ = cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
	max_ctr = max(ctrs,key=cv2.contourArea)
	hullpts = np.zeros((40,2))
	hull = cv2.convexHull(max_ctr)
	contour = np.zeros(img.shape)
	
	cv2.drawContours(contour,hull,-1,(255,255,255),1)
	contour = convex_hull_image(contour)
	contour = np.asarray(hull,dtype=int)
	contour = np.sum(contour)
	hull = hull.reshape(-1,2)
	hullpts[:hull.shape[0],:] = hull[:40,:]
	return np.concatenate((hullpts[:,0],hullpts[:,1],[contour],img_filtered.reshape(-1)))

def preprocessing(imgs) -> np.array:
	array = np.zeros((imgs.shape[0],140582), dtype=np.int8)
	array[:,0]=imgs[:,0]
	vec_preprocess = np.vectorize(preprocess)
	array[:,1:] = vec_preprocess(imgs[:,1:])
	return array

### Model

In [12]:
def read_model(classifier_path, extractor_path):
	return pickle.load(open(classifier_path, "rb")), pickle.load(open( extractor_path, "rb"))

In [13]:
def model_prediction(model, x_test, y_test):
	model.fit(x_test, y_test)
	prediction_test = np.array(np.floor(model.predict(x_test)), dtype=np.int8)
	acc_test = accuracy_score(y_test, prediction_test)
	print("Accuracy Score Test: ", acc_test)

### Pipeline

In [7]:
dataset = read_images('../reduced_dataset/')
dataset = preprocessing(dataset)
X = dataset[:, 1:]
Y = dataset[:, 0]
classifier, extractor = read_model("classifier.pkl","extractor.pkl")
model_prediction(classifier, extractor.transform(X), Y)

Accuracy Score Test:  0.9807797913234486
