#To Use
First run all **Installs & Imports** cells

Then, for training run all **Train** cells with the appropriate variable adjusted (see below)

For recognition, run all **Recognition Functions** cells, then either the "all videos" or "one video" cell under **Run on videos**

#Docs
`face_recognition` library docs: https://face-recognition.readthedocs.io/en/latest/face_recognition.html

# Installs & Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%ls
%cd "drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition"

[0m[01;34mdrive[0m/  [01;34msample_data[0m/
[Errno 2] No such file or directory: 'drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognit'
/content


In [None]:
!pip install face_recognition
!pip install opencv-python
!cd "/drive/My Drive/_AMPPD TEAM SHARED FOLDER"

Collecting face_recognition
  Downloading https://files.pythonhosted.org/packages/1e/95/f6c9330f54ab07bfa032bf3715c12455a381083125d8880c43cbe76bb3d0/face_recognition-1.3.0-py2.py3-none-any.whl
Collecting face-recognition-models>=0.3.0
[?25l  Downloading https://files.pythonhosted.org/packages/cf/3b/4fd8c534f6c0d1b80ce0973d01331525538045084c73c153ee6df20224cf/face_recognition_models-0.3.0.tar.gz (100.1MB)
[K     |████████████████████████████████| 100.2MB 73kB/s 
Building wheels for collected packages: face-recognition-models
  Building wheel for face-recognition-models (setup.py) ... [?25l[?25hdone
  Created wheel for face-recognition-models: filename=face_recognition_models-0.3.0-py2.py3-none-any.whl size=100566173 sha256=c479271ddbcb3d30d086569ed077b2c242635d436c07e711a7aca11a99abd41a
  Stored in directory: /root/.cache/pip/wheels/d2/99/18/59c6c8f01e39810415c0e63f5bede7d83dfb0ffc039865465f
Successfully built face-recognition-models
Installing collected packages: face-recognition-m

In [None]:
import face_recognition
import cv2
import os 
import pickle
from tqdm.notebook import tqdm

%cd "/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition"

/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition


# Train

These cells handle training a model on the images provided in for each name in `people`. Training images should be kept in a folder called `{name}Photos` where `{name}` is the label provided in `people`.


In [None]:
from sklearn import svm
import os
import face_recognition

known_faces = [] 

# Labels
people = ["CharlieNelms", "HermanBWells"]

# Images
for name in people:
  image_dir = f"/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition/{name}Photos/"
  images = [f for f in os.listdir(image_dir) if os.path.isfile(image_dir+f)]
  for img in tqdm(images):
    path = f"/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition/{name}Photos/" + img
    face = face_recognition.load_image_file(path)
    face_bounding_boxes = face_recognition.face_locations(face) # Find faces in the picture

    #If training image contains exactly one face
    if len(face_bounding_boxes) == 1:
      face_enc = face_recognition.face_encodings(face)[0]
      # Add face encoding for current image with corresponding label (name) to the training data
      known_faces.append({"encoding": face_enc, "name": name})
    else:
      print(img + " was skipped and can't be used for training")

HBox(children=(FloatProgress(value=0.0, max=9.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=42.0), HTML(value='')))

HermanWells_1947_01.jpg was skipped and can't be used for training
HermanWells_1956.jpg was skipped and can't be used for training
HermanWells_1974.jpg was skipped and can't be used for training
HermanWells_1986.jpg was skipped and can't be used for training
HermanWells_1960.jpg was skipped and can't be used for training



In [None]:
# Save model to file

import pickle

pickle.dump(known_faces, open("model.p", "wb"))

# Recognition Functions

These cells define functions for recognizing faces in provided `video_path` and outputs the resulting timestamps to `output_file`. Uses faces known by the model specified in `model`.

In [None]:
# UTILITY FUNCTIONS

def frame_to_time(frames, fps):
  h =  int(frames/(3600*fps))
  m = int(frames/(60*fps) % 60)
  s = int(frames/fps % 60)
  return ( "%02d:%02d:%02d" % ( h, m, s))

def time_to_frame(timecode, fps):
  h,m,s = timecode.split(":")
  seconds = (int(h)*3600) + (int(m)*60) + int(s)
  return seconds*fps

def time_to_seconds(timecode):
  h,m,s = timecode.split(":")
  return (int(h)*3600) + (int(m)*60) + int(s)

def seconds_to_timecode(seconds):
  h = seconds/3600
  m = (seconds/60) % 60
  s = seconds % 60
  return ( "%02d:%02d:%02d" % ( h, m, s))

In [None]:
# MAIN RECOGNITION FUNCTION 

from matplotlib import pyplot as plt
import pickle

def do_recognize(video_path, output_file, tolerance):
  # Initialize some variables
  face_locations = []
  face_encodings = []
  frame_number = 0

  # Load Model
  model = pickle.load(open("model.p", "rb"))
  known_faces = [face["encoding"] for face in model]
  known_names = [face["name"] for face in model]

  # Initialize results list
  results = []

  # Open the input movie file
  input_movie = cv2.VideoCapture(video_path)
  length = int(input_movie.get(cv2.CAP_PROP_FRAME_COUNT))
  fps = input_movie.get(cv2.CAP_PROP_FPS)

  for i in tqdm(range(0,length)):
      # Grab a single frame of video
      ret, frame = input_movie.read()
      frame_number += 1
      # Quit when the input video file ends
      if not ret:
          break

      # Skip FR in every nth frame
      n = int(fps)
      if frame_number % n != 0:
        continue

      # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
      rgb_frame = frame[:, :, ::-1]

      # Find all the faces and face encodings in the current frame of video
      face_locations = face_recognition.face_locations(rgb_frame)
      face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)

      for face_encoding in face_encodings:
          # See if the face is a match for the known face(s)
          match = face_recognition.compare_faces(known_faces, face_encoding, tolerance=tolerance)
          if any(match):
            encoding_index = match.index(True)
            timecode = frame_to_time(frame_number, fps)
            results.append([timecode, known_names[encoding_index]])

  # All done!
  input_movie.release()
  cv2.destroyAllWindows()


  with open(output_file, "w") as f:
    for r in results:
      f.write(f"{r[0]}\t{r[1]}\n") 

# Run on videos

These cells run the above-defined recognition functions. 


The first runs recognition on all the videos in the `video_dir` folder & output all the results to `output_dir`

The second runs on a single video specified by `filename` and outputs to the specified `output` file.


In [None]:
# All videos 
import os

tolerance = 0.4
video_dir = "/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition/CharlieNelmsVideos/"

output_dir = f"/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition/Nelms Results/results-{tolerance}/"
if not os.path.exists(output_dir):
  os.makedirs(output_dir)

for filename in os.listdir(video_dir):
  if filename.endswith(".mp4"):
    print(filename)
    output = output_dir + filename.replace(".mp4", ".tsv")
    do_recognize(video_dir + filename, output, tolerance)

In [None]:
# Run on one video
tolerance = 0.4
filename = "/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition/CharlieNelmsVideos/Repositioning HBCUs for the Future A Conversation with Foundations.mp4"
output = f"/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition/Nelms Results/results-{tolerance}/Repositioning HBCUs.tsv"
do_recognize(filename, output, tolerance)

HBox(children=(FloatProgress(value=0.0, max=114082.0), HTML(value='')))




# Merge times

These cells merge adjacent timestamps within a certain `threshold`. For example, if a known face was seen at `0:05, 0:06, 0:08, 0:11`, merging with a threshold of 2 seconds would give `0:05, 0:11` as a result.

In [None]:
threshold = 2 #in seconds

In [None]:
%cd "/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition"

timecode_file = "results-0.5/ThingsRemembered.txt"
with open(timecode_file, "r") as f:
  timecodes = [t.strip() for t in f.readlines()]


seconds = [time_to_seconds(t) for t in timecodes]

remove = []
for i in range(len(seconds)-2, 0, -1):
  if seconds[i+1] - seconds[i] <= threshold:
    remove.append(i+1)

for r in remove:
  del seconds[r]

new_timecodes = [seconds_to_timecode(s) for s in seconds]
with open("results-0.5/ThingsRemembered-merged.txt", "w") as f:
  for t in new_timecodes:
    f.write('%s\n' % t)

/content/drive/My Drive/_AMPPD TEAM SHARED FOLDER/MGMs/Facial recognition


# Video output test

Testing video output for result visualization. This does not work.

In [None]:
from matplotlib import pyplot as plt

def do_recognize(video_path, output_file):
  # Initialize some variables
  face_locations = []
  face_encodings = []
  face_names = []
  frame_number = 0

  # Results
  results = []

  # Open the input movie file
  input_movie = cv2.VideoCapture(video_path)
  length = int(input_movie.get(cv2.CAP_PROP_FRAME_COUNT))
  fps = input_movie.get(cv2.CAP_PROP_FPS)
  width = int(input_movie.get(cv2.CAP_PROP_FRAME_WIDTH))
  height = int(input_movie.get(cv2.CAP_PROP_FRAME_HEIGHT))

  # Create an output movie file (make sure resolution/frame rate matches input video!)
  #fourcc = cv2.VideoWriter_fourcc(*'MP4V')
  #output_movie = cv2.VideoWriter('output.mp4', fourcc, fps, (width, height))

  for i in tqdm(range(0,length)):
      # Grab a single frame of video
      ret, frame = input_movie.read()
      frame_number += 1
      # Quit when the input video file ends
      if not ret:
          break

      # Skip FR in every nth frame
      n = int(fps)
      if frame_number % n != 0:
        # Write the resulting image to the output video file
        # print("Writing frame {} / {}".format(frame_number, length))
        # output_movie.write(frame)
        continue

      # Convert the image from BGR color (which OpenCV uses) to RGB color (which face_recognition uses)
      rgb_frame = frame[:, :, ::-1]

      # Find all the faces and face encodings in the current frame of video
      face_locations = face_recognition.face_locations(rgb_frame)
      face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)

      face_names = []
      for face_encoding in face_encodings:
          # See if the face is a match for the known face(s)
          match = face_recognition.compare_faces(known_faces, face_encoding, tolerance=0.40)
          if any(match):
            timecode = frame_to_time(frame_number, fps)
            results.append(timecode)
            face_names.append("HBW")

  # All done!
  input_movie.release()
  #output_movie.release()
  cv2.destroyAllWindows()


  with open(output_file, "w") as f:
    for i in found_at:
      f.write('%s\n' % i)

KeyboardInterrupt: ignored