In [1]:
import cv2
import numpy as np
import math
!pip install pyheif
import pyheif
import re

!pip install dlib
import dlib
!pip install face_recognition
!pip3 install scikit-learn
from sklearn import neighbors
import pickle
from PIL import Image, ImageDraw, ImageFont
import face_recognition
from face_recognition.face_recognition_cli import image_files_in_folder
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'PNG', 'JPG', 'JPEG', 'HEIC', 'heic'}

Collecting pyheif
  Downloading pyheif-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.8/9.8 MB[0m [31m29.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pyheif
Successfully installed pyheif-0.7.1
Collecting face_recognition
  Downloading face_recognition-1.3.0-py2.py3-none-any.whl (15 kB)
Collecting face-recognition-models>=0.3.0 (from face_recognition)
  Downloading face_recognition_models-0.3.0.tar.gz (100.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.1/100.1 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566170 sha256=e236bc8bdf4e1bea98ef34c

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Train the model: a k-nearest neighbors classifier for face recognition
# :param path: directory that contains a sub-directory (named as the person name) for each known person.

def train(path, n_neighbors=None, knn_algo='ball_tree', verbose=True, test_size=0.3):

  # Initiate the train and validate set
  X_train, X_val, y_train, y_val = [], [], [], []

  # Loop through each person/className in the training set
  for class_dir in os.listdir(path):
    if not os.path.isdir(os.path.join(path, class_dir)):
      continue

    encodeListKnown = [] # each image's encoding
    classNames = [] # person's name corresponding to the image

    # Loop through each training image for the current person
    for img_path in image_files_in_folder(os.path.join(path, class_dir)):
      # skip the non-image file
      if img_path.split(".")[-1] not in ALLOWED_EXTENSIONS:
        continue

      # handle the ".HEIC" training image
      if img_path.split(".")[-1] in {'HEIC', 'heic'}:
          heif_file = pyheif.read(img_path)
          reImg = Image.frombytes(
            heif_file.mode,
            heif_file.size,
            heif_file.data,
            "raw",
            heif_file.mode,
            heif_file.stride,
          )
          image = np.array(reImg)
      else: image = face_recognition.load_image_file(img_path)

      face_bounding_boxes = face_recognition.face_locations(image)

      if len(face_bounding_boxes) != 1:
      # If there are no people (or too many people) in a training image, skip the image.
        if verbose:
            print("Image {} not suitable for training: {}".format(img_path, "Didn't find a face" if len(face_bounding_boxes) < 1 else "Found more than one face"))
      else:
      # Add face encoding for current image to the training set
        img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        encode = face_recognition.face_encodings(img, known_face_locations=face_bounding_boxes)[0]
        encodeListKnown.append(encode)
        classNames.append(class_dir)

    if len(encodeListKnown) < 3:
      test_size = 0.5

    # Split the data into training and validation sets
    ppl_X_train, ppl_X_val, ppl_y_train, ppl_y_val = train_test_split(
        encodeListKnown, classNames, test_size=test_size, random_state=42
    )

    # Add the training and val sets for this person to the whole training & val sets.
    X_train.extend(ppl_X_train)
    X_val.extend(ppl_X_val)
    y_train.extend(ppl_y_train)
    y_val.extend(ppl_y_val)

  # Determine how many neighbors to use for weighting in the KNN classifier
  if n_neighbors is None:
      n_neighbors = int(round(math.sqrt(len(X_train))))
      if verbose:
          print("Chose n_neighbors automatically:", n_neighbors)

  # Create and train the KNN classifier
  knn_clf = neighbors.KNeighborsClassifier(
  n_neighbors=n_neighbors, algorithm=knn_algo, weights='distance'
  )
  knn_clf.fit(X_train, y_train)

  # Predictions on the validation set
  y_pred = knn_clf.predict(X_val)

  # Calculate precision, recall, and F1-score
  precision = precision_score(y_val, y_pred, average='weighted')
  recall = recall_score(y_val, y_pred, average='weighted')
  f1 = f1_score(y_val, y_pred, average='weighted')

  print(f"Precision: {precision:.4f}")
  print(f"Recall: {recall:.4f}")
  print(f"F1-score: {f1:.4f}")

  return knn_clf

In [4]:
# Predict the unknown images:
# Recognizes faces in given image using a trained KNN classifier

def predict(X_img, knn_clf=None, distance_threshold=0.47):

  X_face_locations = face_recognition.face_locations(X_img)

  # If no faces are found in the image, return an empty result.
  if len(X_face_locations) == 0:
    # print("Didn't find a face in the image.")
    return []

  # Find encodings for faces in the test iamge
  faces_encodings = face_recognition.face_encodings(X_img, known_face_locations=X_face_locations)

  # Use the KNN model to find the best matches for the test face
  closest_distances = knn_clf.kneighbors(faces_encodings, n_neighbors=1)
  #print("Closest distances are:", closest_distances)

  are_matches = [closest_distances[0][i][0] <= distance_threshold for i in range(len(X_face_locations))]
  #print("Match with the object? ", are_matches)

  # Predict classes and remove classifications that aren't within the threshold
  return [(pred, loc) if rec else ("unknown", loc) for pred, loc, rec in zip(knn_clf.predict(faces_encodings), X_face_locations, are_matches)]



**Main Function**

Train the model

In [None]:
# TRAIN THE MODEL:
# Train the KNN classifier and get the KNN clf for later use.

# import the training images from the path
trainPath = '/content/drive/MyDrive/SCU COEN 240 Machine Learning/data/train'

print("Training KNN classifier...")
# call the train function
classifier = train(trainPath, n_neighbors=2)
print("Training complete!")

Training KNN classifier...
Image /content/drive/MyDrive/SCU COEN 240 Machine Learning/data/train/Anirudh/0921-ClassPhotos .png not suitable for training: Found more than one face
Image /content/drive/MyDrive/SCU COEN 240 Machine Learning/data/train/Shreya Devendra/Shreya Bisen (09_28).jpeg not suitable for training: Found more than one face
Image /content/drive/MyDrive/SCU COEN 240 Machine Learning/data/train/Kaiyue/d44a505436e430369cba0f59823fd93.png not suitable for training: Didn't find a face
Image /content/drive/MyDrive/SCU COEN 240 Machine Learning/data/train/Kaiyue/5d597c554c1b04c3276677553f1a3c8.png not suitable for training: Didn't find a face
Image /content/drive/MyDrive/SCU COEN 240 Machine Learning/data/train/Kaiyue/167f63c69e174e2ff4b596ec38b2b94.png not suitable for training: Didn't find a face
Image /content/drive/MyDrive/SCU COEN 240 Machine Learning/data/train/Kaiyue/8c573925b8c9b928d818de6265da8c8.png not suitable for training: Didn't find a face
Image /content/drive/

In [None]:
# Save the model for later use

with open('/content/drive/My Drive/SCU COEN 240 Machine Learning/trainedModel/knn_model.pkl', 'wb') as f:
    pickle.dump(classifier, f)

Test the model

In [6]:
# Datasets path:

# Import the lables text file:
labels_path = '/content/drive/MyDrive/SCU COEN 240 Machine Learning/data/test/labels.txt'
# Import the testing pictures file:
testPath = '/content/drive/MyDrive/SCU COEN 240 Machine Learning/data/test'

In [22]:
# Read the text file and extract the second column as true labels

labels = []
with open(labels_path, 'r') as file:
    for line in file:
      elements = line.strip().split('\t')
      combined = '-'.join(elements)  # Combine the elements with '-'
      labels.append(combined)

# sort the label
labels = sorted(labels, key=lambda x: int(x.split('_')[0]))
true_labels = [label.split('-')[-1] for label in labels]

# Display the label list:
print(labels)
print(len(labels))
print(true_labels)
print(len(true_labels))


['2_0.jpeg-Anirudh', '3_0.jpeg-Anirudh', '4_0.jpeg-Anirudh', '5_0.jpeg-Anirudh', '7_0.jpeg-Aparnaa', '8_0.jpeg-Aparnaa', '10_0.jpeg-Chen', '11_0.jpeg-Chen', '12_0.jpeg-Chen', '14_0.jpeg-Chen', '16_0.jpeg-Dheeraj', '17_0.jpeg-Dheeraj', '18_0.jpeg-Divyanth', '19_0.jpeg-Divyanth', '20_0.jpeg-Divyanth', '21_0.jpeg-Divyanth', '22_0.jpeg-Griffin', '23_0.jpeg-Griffin', '24_0.jpeg-Griffin', '25_0.jpeg-Griffin', '26_0.jpeg-Haisong', '27_0.jpeg-Haisong', '28_0.jpeg-Haisong', '29_0.jpeg-Haisong', '30_0.jpeg-Haochen', '31_0.jpeg-Haochen', '32_0.jpeg-Haochen', '33_0.jpeg-Haochen', '35_0.jpeg-Huiyu', '37_0.jpeg-Huiyu', '38_0.jpeg-Huiyu', '39_0.jpeg-Huiyu', '40_0.jpeg-Husain', '41_0.jpeg-Husain', '42_0.jpeg-Husain', '44_0.jpeg-Husain', '47_0.jpeg-Kaiyue', '48_1.jpeg-Kaiyue', '49_0.jpeg-Kimsong', '51_0.jpeg-Kimsong', '52_0.jpeg-Kimsong', '53_0.jpeg-Kimsong', '58_0.jpeg-Madhuri', '59_0.jpeg-Madhuri', '60_0.jpeg-Madhuri', '61_0.jpeg-Manya', '62_0.jpeg-Manya', '63_0.jpeg-Manya', '64_0.jpeg-Peiqi', '65_1.

In [21]:
# TEST THE MODEL:
# Using the trained classifier, make predictions for unknown images

# Load the saved model

with open('/content/drive/My Drive/SCU COEN 240 Machine Learning/trainedModel/knn_model.pkl', 'rb') as f:
    model = pickle.load(f)

if __name__ == "__main__":
    predictList = []

    for image_file in os.listdir(testPath):
        full_file_path = os.path.join(testPath, image_file)

        if not os.path.isfile(full_file_path) or os.path.splitext(full_file_path)[1][1:] not in ALLOWED_EXTENSIONS:
            print("This is the label path: {}".format(full_file_path))
            continue

        if image_file.split(".")[-1] in {'HEIC', 'heic'}:
            heif_file = pyheif.read(full_file_path)
            reImg = Image.frombytes(
                heif_file.mode,
                heif_file.size,
                heif_file.data,
                "raw",
                heif_file.mode,
                heif_file.stride,
            )
            X_img = np.array(reImg)
        else:
            X_img = face_recognition.load_image_file(full_file_path)

        predictions = predict(X_img, knn_clf=model)
        if len(predictions) == 0:
            print("Didn't find a face in the image: {}".format(image_file))
            predictName = ("-").join([image_file, "Unknown"])
            predictList.append(predictName)
            continue


        for name, (top, right, bottom, left) in predictions:
            predictName = ("-").join([image_file, name])
            predictList.append(predictName)

    predictList = sorted(predictList, key=lambda x: int(x.split('_')[0]))
    predictNameList = [pre.split("-")[-1] for pre in predictList]
    print("Predict List:", predictList)
    print("Predict name:", predictNameList)

    precision = precision_score(true_labels, predictNameList, average='weighted')
    recall = recall_score(true_labels, predictNameList, average='weighted')
    f1 = f1_score(true_labels, predictNameList, average='weighted')

    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(" ")


This is the label path: /content/drive/MyDrive/SCU COEN 240 Machine Learning/data/test/labels.txt
Predict List: ['2_0.jpeg-Anirudh', '3_0.jpeg-Anirudh', '4_0.jpeg-Anirudh', '5_0.jpeg-Anirudh', '7_0.jpeg-Aparnaa', '8_0.jpeg-Aparnaa', '10_0.jpeg-Chen', '11_0.jpeg-Chen', '12_0.jpeg-Chen', '14_0.jpeg-Chen', '16_0.jpeg-Dheeraj', '17_0.jpeg-Dheeraj', '18_0.jpeg-Divyanth', '19_0.jpeg-Divyanth', '20_0.jpeg-Divyanth', '21_0.jpeg-Divyanth', '22_0.jpeg-Griffin', '23_0.jpeg-Griffin', '24_0.jpeg-Griffin', '25_0.jpeg-Griffin', '26_0.jpeg-Haisong', '27_0.jpeg-Haisong', '28_0.jpeg-Haisong', '29_0.jpeg-Haisong', '30_0.jpeg-Haochen', '31_0.jpeg-Haochen', '32_0.jpeg-Haochen', '33_0.jpeg-Haochen', '35_0.jpeg-Huiyu', '37_0.jpeg-Huiyu', '38_0.jpeg-Huiyu', '39_0.jpeg-Huiyu', '40_0.jpeg-Husain', '41_0.jpeg-Husain', '42_0.jpeg-Husain', '44_0.jpeg-Husain', '47_0.jpeg-Kaiyue', '48_1.jpeg-Kaiyue', '49_0.jpeg-Kimsong', '51_0.jpeg-Kimsong', '52_0.jpeg-Kimsong', '53_0.jpeg-Kimsong', '58_0.jpeg-Madhuri', '59_0.jpeg-M