<a href="https://colab.research.google.com/github/EjbejaranosAI/AI_trends_2023/blob/main/PoC/Demo_smile_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Smile detection PoC Sprint review

# Import libraries

In [7]:
!pip install -q mediapipe
!pip install -q streamlit
!npm install localtunnel

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m35.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m68.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for validators (setup.py) ... [?25l[?25hdone


# Load model and video

In [1]:
#predictions
import tensorflow as tf
import matplotlib.pyplot as plt
import time

import os
import cv2
import numpy as np
import mediapipe as mp
from google.colab.patches import cv2_imshow



#General parameters
path_video = "/content/video.mp4"
model = tf.keras.models.load_model("smile_model.h5")
threshold = 0.9


# Functions
def predict_smile(img,plot_image= False):
  face_detection = mp.solutions.face_detection.FaceDetection()
  face = face_detection.process(img)
  detections = face.detections

  for detection in detections:
      bbox = detection.location_data.relative_bounding_box
      h, w, _ = img.shape
      x, y, width, height = int(bbox.xmin * w), int(bbox.ymin * h), int(bbox.width * w), int(bbox.height * h)
      crop_face = img[y:y+height, x:x+width]
      cv2.rectangle(img, (x, y), (x + width, y + height), (0, 255, 0), 2)



  resized_face = cv2.resize(crop_face, (224, 224))

  prediction_smile = model.predict(resized_face.reshape(1, 224, 224, 3))
  prediction_smile_value = np.max(prediction_smile[0])
  prediction_smile_index = np.argmax(prediction_smile)
  print(f"Prediction index {prediction_smile_index}")
  print(f"Prediction value {prediction_smile_value}")

  if plot_image:
    print("Image after reshape")
    plt.imshow(cv2.cvtColor(resized_face, cv2.COLOR_BGR2RGB))
    plt.show()
    plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    plt.show()
  # Index 0 = not smiling in prediction_smile_index
  if prediction_smile_index==0:
    print("---------------- Not smiling ----------------")
    prediction_value = 0
  else:

    if prediction_smile_value >= threshold:
      prediction_value = 1
      print("---------------- Smiling ----------------")
    else:
      prediction_value = 0
      print("---------------- Not smiling ----------------")

  return prediction_value





def extract_video_segments(video_path, time_length, contnue_results=True):
    tic = time.time()

    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    print(f"Total frames: {total_frames}")
    print(f"FPS: {fps}")
    try:
        video_length = int(total_frames)/int(fps)
        print(f"Video length: {video_length} seconds")
    except ZeroDivisionError:
        print("Could not get video length")

    # Calculate the frame indices for the start and end segments
    start_frames = int(fps * time_length)
    end_frames = int(total_frames - fps * time_length)
    init_pred = []
    final_pred = []

    video_start = []
    video_end = []
    for frame_num in range(int(total_frames)):
        ret, frame = cap.read()
        if not ret:
            break
        if frame_num < start_frames:
            init_pred.append(predict_smile(frame))
            video_start.append(frame)
        if frame_num > end_frames:
            final_pred.append(predict_smile(frame))
            video_end.append(frame)

    cap.release()

    # Save the extracted video segments
    output_path_start = os.path.join(os.path.dirname(video_path), "start_segment.mp4")
    output_path_end = os.path.join(os.path.dirname(video_path), "end_segment.mp4")

    save_video(output_path_start, video_start, fps)
    save_video(output_path_end, video_end, fps)

    if contnue_results:
      print("------------------ Continuous analysis ------------------\n\n")
      #Processing results to detect continuos positive predictions
      print(f"For the first {time_length} seconds of the video the result is: ")
      general_init_pred = is_continuous_ones(init_pred)
      print(f"\nFor the last {time_length} of the video the result is: ")
      general_final_pred = is_continuous_ones(final_pred)

    else:
      print("------------------ General analysis ------------------")
      # Processing results to detect smilling through the video
      print(f"For the first {time_length} seconds of the video the result is: ")
      general_init_pred = procesing_results(init_pred)

      print(f"\nFor the last {time_length} of the video the result is: ")
      general_final_pred = procesing_results(final_pred)


    toc = time.time()
    print(toc-tic)
    return init_pred, final_pred



def save_video(output_path, frames, fps):
    height, width, _ = frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
    for frame in frames:
        out.write(frame)
    out.release()


def is_continuous_ones(arr):
    ones_count = 0
    for num in arr:
        if num == 1:
            ones_count += 1
            if ones_count >= 10:

                print("For this segment of the video IS smiling")
                general_prediction = 1
                return general_prediction
        else:
            #print("not smilling")
            general_prediction = 0
    if general_prediction ==0:
        print("For this segment of the video IS NOT smiling")
    return general_prediction


def procesing_results(results):

  start_len = len(results)

  smiles_sum_start = np.sum(start)
  if smiles_sum_start >= start_len/3:
    print("For this segment of the video is smiling :) ")
    general_prediction = 1
    return general_prediction
  else:
    print("For this segment of the video IS NOT smiling")
    general_prediction = 0
    return general_prediction





In [2]:
start, end = extract_video_segments(path_video,10)

Total frames: 1371.0
FPS: 24.0
Video length: 57.125 seconds
Prediction index 1
Prediction value 0.6849311590194702
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.743406355381012
---------------- Not smiling ----------------
Prediction index 0
Prediction value 0.547702431678772
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.7491965293884277
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.7085261344909668
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.7040334343910217
---------------- Not smiling ----------------
Prediction index 0
Prediction value 0.5570223331451416
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.8175230622291565
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.5957059860229492
---------------- Not smiling ----------------
Prediction index 1
Prediction val

In [3]:
import tensorflow as tf
import matplotlib.pyplot as plt
import time
import os
import cv2
import numpy as np
import mediapipe as mp
from google.colab.patches import cv2_imshow


class SmileDetectionAnalyzer:
    def __init__(self, model_path, threshold=0.9):
        self.model = tf.keras.models.load_model(model_path)
        self.threshold = threshold

    def predict_smile(self, img, plot_image=False):
        face_detection = mp.solutions.face_detection.FaceDetection()
        face = face_detection.process(img)
        detections = face.detections

        for detection in detections:
            bbox = detection.location_data.relative_bounding_box
            h, w, _ = img.shape
            x, y, width, height = int(bbox.xmin * w), int(bbox.ymin * h), int(bbox.width * w), int(bbox.height * h)
            crop_face = img[y:y + height, x:x + width]
            cv2.rectangle(img, (x, y), (x + width, y + height), (0, 255, 0), 2)

        resized_face = cv2.resize(crop_face, (224, 224))

        prediction_smile = self.model.predict(resized_face.reshape(1, 224, 224, 3))
        prediction_smile_value = np.max(prediction_smile[0])
        prediction_smile_index = np.argmax(prediction_smile)
        print(f"Prediction index {prediction_smile_index}")
        print(f"Prediction value {prediction_smile_value}")

        if plot_image:
            print("Image after reshape")
            plt.imshow(cv2.cvtColor(resized_face, cv2.COLOR_BGR2RGB))
            plt.show()
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            plt.show()

        if prediction_smile_index == 0:
            print("---------------- Not smiling ----------------")
            prediction_value = 0
        else:
            if prediction_smile_value >= self.threshold:
                prediction_value = 1
                print("---------------- Smiling ----------------")
            else:
                prediction_value = 0
                print("---------------- Not smiling ----------------")

        return prediction_value

    @staticmethod
    def save_video(output_path, frames, fps):
        height, width, _ = frames[0].shape
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")
        out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
        for frame in frames:
            out.write(frame)
        out.release()

    @staticmethod
    def is_continuous_ones(arr):
        ones_count = 0
        for num in arr:
            if num == 1:
                ones_count += 1
                if ones_count >= 10:
                    print("For this segment of the video IS smiling")
                    general_prediction = 1
                    return general_prediction
            else:
                general_prediction = 0
        if general_prediction == 0:
            print("For this segment of the video IS NOT smiling")
        return general_prediction

    @staticmethod
    def processing_results(results):
        start_len = len(results)
        smiles_sum_start = np.sum(results)
        if smiles_sum_start >= start_len / 3:
            print("For this segment of the video is smiling :) ")
            general_prediction = 1
        else:
            print("For this segment of the video IS NOT smiling")
            general_prediction = 0
        return general_prediction

    def extract_video_segments(self, video_path, time_length, continue_results=True):
        tic = time.time()

        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
        print(f"Total frames: {total_frames}")
        print(f"FPS: {fps}")
        try:
            video_length = int(total_frames) / int(fps)
            print(f"Video length: {video_length} seconds")
        except ZeroDivisionError:
            print("Could not get video length")

        start_frames = int(fps * time_length)
        end_frames = int(total_frames - fps * time_length)
        init_pred = []
        final_pred = []
        video_start = []
        video_end = []

        for frame_num in range(int(total_frames)):
            ret, frame = cap.read()
            if not ret:
                break
            if frame_num < start_frames:
                init_pred.append(self.predict_smile(frame))
                video_start.append(frame)
            if frame_num > end_frames:
                final_pred.append(self.predict_smile(frame))
                video_end.append(frame)

        cap.release()

        output_path_start = os.path.join(os.path.dirname(video_path), "start_segment.mp4")
        output_path_end = os.path.join(os.path.dirname(video_path), "end_segment.mp4")

        self.save_video(output_path_start, video_start, fps)
        self.save_video(output_path_end, video_end, fps)

        if continue_results:
            print("------------------ Continuous analysis ------------------\n\n")
            print(f"For the first {time_length} seconds of the video the result is: ")
            general_init_pred = self.is_continuous_ones(init_pred)
            print(f"\nFor the last {time_length} of the video the result is: ")
            general_final_pred = self.is_continuous_ones(final_pred)
        else:
            print("------------------ General analysis ------------------")
            print(f"For the first {time_length} seconds of the video the result is: ")
            general_init_pred = self.processing_results(init_pred)
            print(f"\nFor the last {time_length} of the video the result is: ")
            general_final_pred = self.processing_results(final_pred)

        toc = time.time()
        print(toc - tic)
        return init_pred, final_pred
analyzer = SmileDetectionAnalyzer(model_path="smile_model.h5", threshold=0.9)

init_pred, final_pred = analyzer.extract_video_segments(video_path="/content/video.mp4", time_length=10, continue_results=True)


Total frames: 1371.0
FPS: 24.0
Video length: 57.125 seconds
Prediction index 1
Prediction value 0.6849311590194702
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.743406355381012
---------------- Not smiling ----------------
Prediction index 0
Prediction value 0.547702431678772
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.7491965293884277
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.7085261344909668
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.7040334343910217
---------------- Not smiling ----------------
Prediction index 0
Prediction value 0.5570223331451416
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.8175230622291565
---------------- Not smiling ----------------
Prediction index 1
Prediction value 0.5957059860229492
---------------- Not smiling ----------------
Prediction index 1
Prediction val