# Import models

In [None]:
import cv2
import tensorflow as tf
# from tensorflow import load_model
from mtcnn import MTCNN
import matplotlib.pyplot as plt
from skimage.feature import hog


# Face Detection using CNN

## What we might have to use in our final code

In [None]:
def detect_faces_cnn(video_path, model_path):
  """
  This function detects faces in every frame of a video using a CNN model.

  Args:
      video_path: Path to the video file.
      model_path: Path to the pre-trained CNN model for face detection.

  Returns:
      None
  """

  # Create video capture object
  cap = cv2.VideoCapture(video_path)

  # Check if video capture is successful
  if not cap.isOpened():
    print("Error opening video!")
    return

  # Load the pre-trained CNN model
  face_detection_model = load_model(model_path)

  while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Check if frame is read correctly
    if not ret:
      print("No more frames to process!")
      break

    # Preprocess frame for the CNN model (resize, normalize)
    # Refer to your model's documentation for specific preprocessing steps
    preprocessed_frame = preprocess_frame(frame)

    # Make predictions on the preprocessed frame
    predictions = face_detection_model.predict(np.expand_dims(preprocessed_frame, axis=0))

    # Process predictions to get bounding boxes (refer to model's output format)
    faces = process_predictions(predictions)

    # Draw rectangles around detected faces
    for (x, y, w, h) in faces:
      cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)

    # Display the resulting frame with detections
    cv2.imshow('Video with Face Detection (CNN)', frame)

    # Exit loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
      break

  # Release resources
  cap.release()
  cv2.destroyAllWindows()

# Define functions for preprocessing frame and processing predictions
# Specific implementation depends on your chosen CNN model

def preprocess_frame(frame):
  # Resize the frame
  resized_frame = cv2.resize(frame, (target_width, target_height)) # Specify target dimensions somewhere in code
  # Normalize pixel values (e.g., divide by 255)
  normalized_frame = resized_frame / 255.0
  return normalized_frame

def process_predictions(predictions):
  # Extract bounding boxes from the model's output based on its format
  # This might involve thresholds, classification scores, etc.
  faces = []
  # ... (implementation based on your model's output)
  return faces

# Example usage
video_path = 'path/to/your/video.mp4'
model_path = 'path/to/your/cnn_face_detection_model.h5'
detect_faces_cnn(video_path, model_path)


## Using MTCNN to detect faces

In [None]:
def extract_features_from_faces(detections, frame):
  """
  This function extracts features from a face in a frame.

  Args:
      box: Bounding box coordinates of the face.
      frame: Frame containing the face.

  Returns:
      features: Extracted features from the face.
  """
  features = []

  for detection in detections:
    box = detection['box']
    x, y, w, h = box
    face = frame[y:y+h, x:x+w]
    features.append(extract_features(face))

  return features

def extract_features(face):
  """
  This function extracts features from a face image.

  Args:
      face: Face image.
  
  Returns:
      features: Extracted features from the face.
  """
  
  plt.imshow(face)
  plt.show()

  gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
  
  features, hog_image = hog(gray, orientations=8, pixels_per_cell=(16, 16),
                  cells_per_block=(1, 1), visualize=True) # , multichannel=False    
  
  return features

In [None]:
def detect_faces_cnn(video_path):
  """
  This function detects faces in every frame of a video using a CNN model.

  Args:
      video_path: Path to the video file.

  Returns:
      None
  """
  all_features  = []
  framenum = 0

  # Create video capture object
  cap = cv2.VideoCapture(video_path)

  # Check if video capture is successful
  if not cap.isOpened():
    print("Error opening video!")
    return

  while True:
    # Capture frame-by-frame
    ret, frame = cap.read()

    # Check if frame is read correctly
    if not ret:
      print("No more frames to process!")
      break

    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    detector = MTCNN()
    detections = detector.detect_faces(frame)
    all_features.append(extract_features_from_faces(detections, frame))
    framenum += 1
    print("Frame Number " + str(framenum))

  # Release resources
  cap.release()
  cv2.destroyAllWindows()

# Example usage
video_path = 'C:/Users/dexte/Github-Repositories/multi-person-video-transcription/MultiSpeech/FaceDetector/videos/Example Video 1 - Jordan Peterson Confronts Australian Politician on Gender Politics and Quotas Q&A - Trim.mp4'
detect_faces_cnn(video_path)


In [None]:
all_features = []
image_path = r'C:\Users\dexte\Github-Repositories\multi-person-video-transcription\MultiSpeech\FaceDetector\images\d554ecb4a99486834ea873acbd42e91a.jpg'
img = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
detector = MTCNN()
detections = detector.detect_faces(img)
all_features.append(extract_features_from_faces(detections, img))

plt.imshow(img)
plt.show()

In [None]:
img_with_dets = img.copy()
min_conf = 0.9
for det in detections:
    if det['confidence'] >= min_conf:
        x, y, width, height = det['box']
        keypoints = det['keypoints']
        cv2.rectangle(img_with_dets, (x,y), (x+width,y+height), (0,155,255), 2)
        cv2.circle(img_with_dets, (keypoints['left_eye']), 2, (0,155,255), 2)
        cv2.circle(img_with_dets, (keypoints['right_eye']), 2, (0,155,255), 2)
        cv2.circle(img_with_dets, (keypoints['nose']), 2, (0,155,255), 2)
        cv2.circle(img_with_dets, (keypoints['mouth_left']), 2, (0,155,255), 2)
        cv2.circle(img_with_dets, (keypoints['mouth_right']), 2, (0,155,255), 2)
plt.figure(figsize = (10,10))
plt.imshow(img_with_dets)
plt.axis('off')