### Install Dependencies

In [None]:
# %pip install pandas
# %pip install numpy
# %pip install scikit-learn
# %pip install scikit-image
# %pip install opencv-python
# %pip install Pillow

### Includes

In [None]:
import time
import pickle
import glob
import cv2
import numpy as np
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from skimage.morphology import convex_hull_image
from sklearn.metrics import accuracy_score
from PIL import Image

### Utility function to compress images (Run this cell)
- Please note that the images should have an aspect ratio of 16:9 (width:height) and of size > (500 * 281).

In [None]:
# def compress_image(path: str):
#     """
#     Compresses the image from given path to dimensions less than 500x500
#     """
#     im = Image.open(path)
#     im.thumbnail((500, 500), Image.LANCZOS)
#     return cv2.cvtColor(np.array(im), cv2.COLOR_RGB2BGR)

def compress_image(path: str):
    """
    Compresses the image from given path to dimensions less than 500x500
    """
    im = cv2.imread(path)
    im = cv2.resize(im, (500, 281))
    return im

# im = compress_image("../../dataset/men/3/3_men (102).JPG")
# # save image as img1.jpg
# cv2.imwrite("img1.jpg", im)

### Utility functions to read the dataset and perform preprocessing (Run this cell)
- Images should be in jpg format in the path specified to the function "read_images".

In [None]:
def read_image(path: str) -> np.ndarray:
    """
    Reads an image from the given path, compresses it and returns a numpy array of the image
    """
    return compress_image(path).reshape(-1)


def get_jpgs(directory: str) -> list[str]:
    """
    Lists all jpgs in the given directory sorted by their name
    """
    return sorted(glob.glob(directory + '*.jpg'), key=lambda f: (int(''.join(filter(str.isdigit, f)) or 0), f))


def read_images(directory: str) -> np.ndarray:
    """
    Reads all images in the given directory and returns a numpy array of the images
    """
    jpgs = get_jpgs(directory)
    array = np.empty(dtype=np.float32, shape = (len(jpgs), 281*500*3))
    for index, jpg in enumerate(jpgs):
        array[index] = read_image(jpg)
    return array


def preprocess(img: np.ndarray) -> np.ndarray:
    """
    Applies preprocessing to the image and returns a numpy array of the preprocessed image
    """
    img = np.array(np.round(img), dtype=np.uint8).reshape((281, 500, 3))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
    avg_luma = np.mean(img[:, :, 0])
    if avg_luma > 196:
        img = cv2.inRange(img, (0, 133, 77), (255, 163, 140))
    else:
        img = cv2.inRange(img, (0, 137, 77), (255, 163, 140))
    blur = cv2.medianBlur(img, 5)
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (8, 8))
    img_filtered = cv2.dilate(blur, kernel)
    ctrs, _ = cv2.findContours(img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    max_ctr = max(ctrs, key=cv2.contourArea)
    hullpts = np.zeros((40, 2))
    hull = cv2.convexHull(max_ctr)
    contour = np.zeros(img.shape)
    cv2.drawContours(contour, hull, -1, (255, 255, 255), 1)
    contour = convex_hull_image(contour)
    contour = np.asarray(hull, dtype=int)
    contour = np.sum(contour)
    hull = hull.reshape(-1, 2)
    hullpts[:hull.shape[0], :] = hull[:40, :]
    img_filtered = np.asarray(img_filtered, dtype=np.uint8)
    return np.concatenate((hullpts[:, 0], hullpts[:, 1], [contour], img_filtered.reshape(-1)), dtype=np.float32)


### Utility functions for reading and using the model (Run this cell)

In [None]:
def read_model(classifier_path: str, extractor_path: str):
    """
    Takes the path of pkl files and returns the classifier and the feature extractor
    """
    return pickle.load(open(classifier_path, "rb")), pickle.load(open(extractor_path, "rb"))

def model_prediction(model : SVC, extractor : PCA, x_test : np.ndarray, y_test = None):
    """
    Takes the model, feature extractor and the test data\n
    Prints the accuracy if the labels are given\n
    Writes the predictions to results.txt and the time taken to time.txt
    """
    predictions = np.empty((x_test.shape[0]), dtype=np.int8)
    with open("results.txt", "w") as result_file:
        with open("time.txt", "w") as time_file:
            for x_sample in x_test:
                start = time.time()
                preprocessed = preprocess(x_sample)
                extracted_features = extractor.transform(preprocessed.reshape(1, -1))
                prediction = model.predict(extracted_features)
                time_file.write(str(time.time() - start) + "\n")
                result_file.write(str(round(prediction[0])) + "\n")
                predictions = np.append(predictions, prediction)
    if y_test is not None:
        acc = accuracy_score(y_test, predictions)
        print("Total Accuracy: ", acc)


### Pipeline (Run this cell)
- Give the dataset path to the function "read_images" and the model path to the function "read_model" if changed.

In [None]:
X = read_images('data/')
classifier, extractor = read_model("classifier.pkl","extractor.pkl")
model_prediction(classifier, extractor, X)

### Appendix (Don't run)
Contains implementation details of the model

In [None]:
# feature_extractor = PCA(n_components=30)
# classifier = SVC(C=10)