<a href="https://colab.research.google.com/github/Charitythedatascientist/computer_vision/blob/main/facedetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Importing Libraries**

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from sklearn.neighbors import KNeighborsClassifier
import pickle

In [4]:
#Viewing the images

image_path = '/content/drive/MyDrive/photos'
os.listdir(image_path)


['20240907_090503.heic',
 '20241005_162036.heic',
 '20240907_090450.heic',
 '20240907_090458.heic',
 '20241026_150404.heic',
 '20240907_090454.heic',
 '20231127_142517.jpg',
 '20231127_142509.jpg',
 '20231013_180020.jpg',
 '20230921_232218.jpg',
 '20231031_131626.jpg',
 '20231013_180022.jpg',
 '20231015_121109.jpg',
 '20231031_150110.jpg',
 '20231213_112812.png']

**Data** **Preprocessing**

In [5]:
def process_faces(image_path):
    # Create empty lists for X (features) and y (labels)
    X, y = [], []

    # os.listdir(image_path) provides a list of all files in the directory.
    for idx, filename in enumerate(os.listdir(image_path)):
        # For files that end with .jpg or .png
        if filename.endswith(".jpg") or filename.endswith(".png"):
            # Each image's path is derived
            img_path = os.path.join(image_path, filename)

            # OpenCV's imread() function is used to read the image and convert it into a numpy array.
            img = cv2.imread(img_path)

            # The image is converted into grayscale before performing face detection for computational efficiency.
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

            # Load the classifier and create an instance of the class.
            # The Haarcascade_frontalface_default.xml is used since it is designed to detect frontal faces.
            face_classifier = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

            # Hyperparameter tuning:
            # The detectMultiScale() method is used to identify faces of different sizes in the input image.
            # The first parameter gray_img is created using the cv2.cvtColor() method.
            # The scaleFactor parameter scales down the size of the input image to make it easier for the algorithm to detect larger faces.
            # The cascade classifier applies a sliding window through the image to detect faces in it.
            # The classifier initially captures a large number of false positives, which are eliminated using the minNeighbors parameter.
            # The minNeighbors parameter specifies the number of rectangles that need to be identified for an object to be considered valid.
            # The minSize parameter sets the min size of the object to be detected.
            faces = face_classifier.detectMultiScale(gray_img, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

            # Detect faces in the image
            # Create a for loop to iterate through each detected face in the faces list.
            # The faces variable contains a list of bounding boxes for each detected face in an image.
            # x: the x-coordinate in the top-left corner of the face
            # y_: the y-coordinate in the top-left corner of the face
            # w: the width of the face
            # h: the height of the face
            for (x, y_, w, h) in faces:
                face = gray_img[y_:y_+h, x:x+w]
                resized_face = cv2.resize(face, (100, 100))  # Resize to a consistent size (100x100)
                X.append(resized_face.flatten())  # Flatten the face for ML model input
                y.append(1)  # Label all faces as "1" for matching

    # Return the processed features and labels as numpy arrays
    return np.array(X), np.array(y)


#**Model** **Training**

In [6]:
def model_training():
  X, y = preprocess_faces()
  if len(X) == 0:
    print("No images available")
  if len(y):
    print("No labels provided")

 #Creating an instance of the KNN model
  knn_model = KNeighborsClassifier(n_neighbors = 5)
  #Fitting the model to the training dataset
  knn_model.fit(X, y)

  # Save the model using Python's pickle module
  #This allows you to later load the model and make predictions without retraining it.
  with open(knn_model, 'wb') as f:
    #the file is opened for writing in binary mode
    #pickle.dump serializes the model into a byte-stream and write it to a file.
    pickle.dump(knn_model, f)

  print("The model has been trained and saved as a pkl file.")

  return knn_model

In [None]:
def testing():
  with open('knn_model.pkl', 'rb') as f:
    knn_model = pickle.load(f)
