### Install Dependencies

In [1]:
# %pip install pandas
# %pip install numpy
# %pip install scikit-learn
# %pip install scikit-image
# %pip install opencv-python
# %pip install Pillow

### Includes

In [2]:
import cv2
import pandas as pd
import numpy as np
import glob
import pickle
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn import svm
from skimage.morphology import convex_hull_image
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from os import listdir
from PIL import Image
import time

### Compress images

In [3]:
def compress_image(path : str):
	im = Image.open(path)
	im.thumbnail((500, 500), Image.LANCZOS)
	return cv2.cvtColor(np.array(im),cv2.COLOR_RGB2BGR)

### Read dataset

In [4]:
def read_image(path: str) -> np.array:
	return compress_image(path).reshape(-1)

def get_jpgs(directory : str):
	return sorted(glob.glob(directory + '*.jpg'), key=lambda f: (int(''.join(filter(str.isdigit, f)) or 0), f))

def read_images(directory : str) -> np.array:
	array = np.array([], dtype=np.float32).reshape(0,281*500*3+1)
	for num in range(0,6):
		men_dir = directory + f'men/{num}/'
		women_dir = directory + f'Women/{num}/'
		new_men = np.array([np.concatenate((np.asarray([num]), read_image(file)), axis=0) for file in get_jpgs(men_dir)])
		new_women = np.array([np.concatenate((np.asarray([num]), read_image(file)), axis=0) for file in get_jpgs(women_dir)])
		array = np.concatenate((array, new_men, new_women), axis=0)
	return array

def preprocess(img) -> np.array:
	img = np.array(np.round(img), dtype=np.uint8).reshape((281,500,3))
	img = cv2.cvtColor(img,cv2.COLOR_BGR2YCrCb)
	avg_luma = np.mean(img[:,:,0])
	if avg_luma>196:
		img = cv2.inRange(img,(0,133,77),(255,163,140))
	else:
		img = cv2.inRange(img,(0,137,77),(255,163,140))
	blur = cv2.medianBlur(img, 5)
	kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 8))
	img_filtered = cv2.dilate(blur, kernel)
	ctrs, _ = cv2.findContours(img,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
	max_ctr = max(ctrs,key=cv2.contourArea)
	hullpts = np.zeros((40,2))
	hull = cv2.convexHull(max_ctr)
	contour = np.zeros(img.shape)
	
	cv2.drawContours(contour,hull,-1,(255,255,255),1)
	contour = convex_hull_image(contour)
	contour = np.asarray(hull,dtype=int)
	contour = np.sum(contour)
	hull = hull.reshape(-1,2)
	hullpts[:hull.shape[0],:] = hull[:40,:]
	img_filtered = np.asarray(img_filtered, dtype=np.uint8)
	return np.concatenate((hullpts[:,0],hullpts[:,1],[contour],img_filtered.reshape(-1)), dtype=np.float32)

def preprocessing(imgs) -> np.array:
	array = np.zeros((imgs.shape[0],140581), dtype=np.float32)
	for i in range(0,imgs.shape[0]):
		array[i,:] = preprocess(imgs[i,:])
	return array

### Model

In [5]:
def read_model(classifier_path, extractor_path):
	return pickle.load(open(classifier_path, "rb")), pickle.load(open( extractor_path, "rb"))

In [8]:
def model_prediction(model, extractor, x_test, y_test):
	# prediction_test = np.array(np.floor(model.predict(x_test)), dtype=np.int8)
	# acc_test = accuracy_score(y_test, prediction_test)
	# print("Accuracy Score Test: ", acc_test)
	with open("results.txt", "w") as result_file:
		with open("time.txt", "w") as time_file:
			for x_sample in x_test:
				start = time.time()
				prediction = model.predict(extractor.transform(preprocessing(x_sample.reshape(1,-1))))
				time_file.write(str(time.time() - start) + "\n")
				result_file.write(str(round(prediction[0])) + "\n")

### Pipeline

In [9]:
dataset = read_images('../clean_reduced_dataset/')
classifier, extractor = read_model("classifier.pkl","extractor.pkl")
start = time.time()
X = dataset[5:1000:50, 1:]
Y = dataset[5:1000:50, 0]
model_prediction(classifier, extractor, X, Y)