# Video and Image Emotion Annotation

This notebook processes videos and images to detect faces and annotate them with recognized emotions using the MTCNN for face detection and HSEmotionRecognizer for emotion recognition.




## Setup
First, install the required libraries:

In [1]:
! pip install mtcnn hsemotion moviepy


Collecting mtcnn
  Downloading mtcnn-0.1.1-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting hsemotion
  Downloading hsemotion-0.3.0.tar.gz (8.0 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting timm (from hsemotion)
  Downloading timm-1.0.3-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->hsemotion)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->hsemotion)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->hsemotion)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting n

In [2]:
from moviepy.editor import VideoFileClip, concatenate_videoclips
from mtcnn import MTCNN
from hsemotion.facial_emotions import HSEmotionRecognizer
import cv2
import numpy as np
import os
from google.colab.patches import cv2_imshow  # Import cv2_imshow for Colab

##Initialize Detectors

Initialize the face detector and emotion recognizer:

In [3]:
detector = MTCNN()
recognizer = HSEmotionRecognizer(model_name='enet_b0_8_best_vgaf', device='cpu')

Downloading enet_b0_8_best_vgaf from https://github.com/HSE-asavchenko/face-emotion-recognition/blob/main/models/affectnet_emotions/enet_b0_8_best_vgaf.pt?raw=true
/root/.hsemotion/enet_b0_8_best_vgaf.pt Compose(
    Resize(size=(224, 224), interpolation=bilinear, max_size=None, antialias=True)
    ToTensor()
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
)


## Face Detection Function

Detect faces in a given frame:

In [None]:
def detect_faces(frame):
    """ Detect faces in the frame using global detector """
    faces = detector.detect_faces(frame)
    return faces

## Annotation Function

Annotate the frame with detected faces and recognized emotions:

In [None]:
def annotate_frame(frame, faces):
    """ Annotate the frame with recognized emotions using global recognizer """
    for face in faces:
        (x, y, w, h) = face['box']
        face_image = frame[y:y+h, x:x+w]  # Extract face region from frame
        emotion = classify_emotions(face_image)
        cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
        cv2.putText(frame, emotion, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

## Emotion Classification Function

Classify emotions for the detected face:

In [4]:
def classify_emotions(face_image):
    """ Classify emotions for the given face image using global recognizer """
    results = recognizer.predict_emotions(face_image)
    if results:
        emotion = results[0]  # Get the most likely emotion
    else:
        emotion = 'Unknown'
    return emotion



## Process Video Frames

Process video frames to detect and annotate faces with emotions:

In [15]:
def process_video_frames(video_path, temp_output_path):
    # Load the video
    video_clip = VideoFileClip(video_path)
    fps = video_clip.fps

    # Initialize output video writer
    out = cv2.VideoWriter(temp_output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (int(video_clip.size[0]), int(video_clip.size[1])))

    # Iterate through frames, detect faces, and annotate emotions
    for frame in video_clip.iter_frames():
        faces = detect_faces(frame)
        annotate_frame(frame, faces)
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)  # Convert RGB to BGR for OpenCV
        out.write(frame)

    # Release resources and cleanup
    out.release()
    cv2.destroyAllWindows()
    video_clip.close()

## Add Audio to Processed Video

Add the original audio back to the processed video:

In [16]:
def add_audio_to_video(original_video_path, processed_video_path, output_path):
    try:
        original_clip = VideoFileClip(original_video_path)
        processed_clip = VideoFileClip(processed_video_path)
        final_clip = processed_clip.set_audio(original_clip.audio)
        final_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')
    except Exception as e:
        print(f"Error while combining with audio: {e}")
    finally:
        original_clip.close()
        processed_clip.close()

## Process Video

Combine video frame processing and adding audio into a single function:

In [17]:
def process_video(video_path, output_path):
    temp_output_path = 'temp_output_video.mp4'

    # Process video frames and save to a temporary file
    process_video_frames(video_path, temp_output_path)

    # Add audio to the processed video
    add_audio_to_video(video_path, temp_output_path, output_path)

## Process Image

Process images to detect and annotate faces with emotions:

In [18]:
# def process_image(image_path, output_path):
#     # Step 2: Read image
#     image = cv2.imread(image_path)
#     if image is None:
#         print(f"Error: Unable to read image at '{image_path}'")
#         return

#     # Step 3: Detect faces and annotate emotions
#     faces = detect_faces(image)
#     annotate_frame(image, faces)

#     # Step 4: Write annotated image to output path
#     cv2.imwrite(output_path, image)

In [7]:
def process_image(input_path, output_path):
    # Step 1: Read input image
    image = cv2.imread(input_path)
    if image is None:
        print(f"Error: Unable to read image at '{input_path}'")
        return

    # Step 2: Detect faces and annotate emotions
    faces = detect_faces(image)
    annotate_frame(image, faces)

    # Step 3: Write annotated image to output path
    cv2.imwrite(output_path, image)

    # Step 4: Combine input and output images horizontally
    input_image = cv2.imread(input_path)
    combined_image = cv2.hconcat([input_image, image])

    # Step 5: Save or display the combined image
    cv2.imwrite(output_path, combined_image)
    cv2_imshow(combined_image)  # Display combined image in Colab


# TRY WITH YOUR OWN **DATA**
Process the video or image based on the file extension:

In [None]:
if __name__ == "__main__":
    input_path = '/content/jj.jpeg'  # Update with your video or image path
    output_path = '/content/out.jpeg'  # Update with the desired output path

    if input_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
        process_video(input_path, output_path)
    elif input_path.lower().endswith(('.jpg', '.jpeg', '.png')):
        process_image(input_path, output_path)
    else:
        print("Unsupported file format. Please provide a video or image file.")


In [21]:
if __name__ == "__main__":
    input_path = '/content/رياكشن عبلة كامل تبكي.mp4'  # Update with your video or image path
    output_path = '/content/رياكشن عبلة كامل تبكي output.mp4'  # Update with the desired output path

    if input_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
        process_video(input_path, output_path)
    elif input_path.lower().endswith(('.jpg', '.jpeg', '.png')):
        process_image(input_path, output_path)
    else:
        print("Unsupported file format. Please provide a video or image file.")






Moviepy - Building video /content/رياكشن عبلة كامل تبكي output.mp4.
MoviePy - Writing audio in رياكشن عبلة كامل تبكي outputTEMP_MPY_wvf_snd.mp4




MoviePy - Done.
Moviepy - Writing video /content/رياكشن عبلة كامل تبكي output.mp4





Moviepy - Done !
Moviepy - video ready /content/رياكشن عبلة كامل تبكي output.mp4
