In [1]:
%env CUDA_VISIBLE_DEVICES=0

env: CUDA_VISIBLE_DEVICES=0


In [3]:
!pip install emotiefflib



In [None]:
conda install tensorflow

Retrieving notices: done
Channels:
 - defaults
Platform: osx-64
Collecting package metadata (repodata.json): done
Solving environment: done

## Package Plan ##

  environment location: /Users/user/Downloads/anaconda3

  added / updated specs:
    - tensorflow


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    absl-py-2.1.0              |  py312hecd8cb5_0         247 KB
    astunparse-1.6.3           |             py_0          17 KB
    ca-certificates-2025.2.25  |       hecd8cb5_0         131 KB
    flatbuffers-24.3.25        |       h6d0c2b6_0         1.4 MB
    gast-0.5.3                 |     pyhd3eb1b0_0          21 KB
    giflib-5.2.2               |       h46256e1_0          80 KB
    google-pasta-0.2.0         |     pyhd3eb1b0_0          46 KB
    grpcio-1.62.2              |  py312hab2016f_0        1009 KB
    keras-3.6.0                |  py312hecd8cb5_0         2.9 MB
    ml_dt

In [5]:
try:
    import google.colab
    IN_COLAB = True
    urllib.request.urlretrieve("https://github.com/av-savchenko/EmotiEffLib/blob/main/docs/tutorials/python/requirements.txt?raw=true", "requirements.txt")
    !pip install -r requirements.txt
except:
    IN_COLAB = False

In [7]:
from typing import List
import numpy as np

def recognize_faces(frame: np.ndarray, device: str) -> List[np.array]:
    """
    Detects faces in the given image and returns the facial images cropped from the original.

    This function reads an image from the specified path, detects faces using the MTCNN
    face detection model, and returns a list of cropped face images.

    Args:
        frame (numpy.ndarray): The image frame in which faces need to be detected.
        device (str): The device to run the MTCNN face detection model on, e.g., 'cpu' or 'cuda'.

    Returns:
        list: A list of numpy arrays, representing a cropped face image from the original image.

    Example:
        faces = recognize_faces('image.jpg', 'cuda')
        # faces contains the cropped face images detected in 'image.jpg'.
    """

    def detect_face(frame: np.ndarray):
        mtcnn = MTCNN(keep_all=False, post_process=False, min_face_size=40, device=device)
        bounding_boxes, probs = mtcnn.detect(frame, landmarks=False)
        if probs[0] is None:
            return []
        bounding_boxes = bounding_boxes[probs > 0.9]
        return bounding_boxes

    bounding_boxes = detect_face(frame)
    facial_images = []
    for bbox in bounding_boxes:
        box = bbox.astype(int)
        x1, y1, x2, y2 = box[0:4]
        facial_images.append(frame[y1:y2, x1:x2, :])
    return facial_images

In [None]:
##############Combine emotion reconition with gpt and voice####################
import cv2
import tensorflow as tf
from PIL import Image
from openai import AzureOpenAI
from gtts import gTTS
import os
import playsound
import time

from typing import List
import matplotlib.pyplot as plt
from facenet_pytorch import MTCNN
from emotiefflib.facial_analysis import EmotiEffLibRecognizer, get_model_list

# Initialize OpenAI Azure client
client = AzureOpenAI(
    api_key="xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",    
    api_version="2023-05-15",
    azure_endpoint="https://hkust.azure-api.net"
)

# Function to get a motivational quote based on emotion
def get_motivational_quote(emotion):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are an expert of generating expressions according to human's emotions."},
            {
                "role": "user",
                "content": f"Our team has created a fitness game called Fitness Fugitive, where the mechanics are similar to Temple Run. During gameplay, the camera captures the player's emotions and body motions. Assume the camera can always detect emotions accurately using an algorithm to classify them as Angry, Disgusted, Fear, Happy, Sad, Surprise, or Neutral. When certain emotions are detected (e.g., Happy, Sad, Tired, Angry), generate encouraging and motivational quotes that align with the player's emotional state and the fitness theme of the game. The quotes should be positive, inspiring, and relevant to the gameplay. For example: If the player is angry, the quote could be: 'Channel that fire into your run! You're unstoppable!' If the player is tired, the quote could be: 'Every step counts! You're stronger than you think!' Now, generate a quote for the emotion without quotation mark: {emotion}."
            }
        ]
    )
    return response.choices[0].message.content


# Function to speak the quote
def speak_quote(quote):
    tts = gTTS(text=quote, lang='en')
    filename = "quote.mp3"
    tts.save(filename)
    playsound.playsound(filename)
    os.remove(filename) 

# Initialize variables
device = "cpu"  # Change to "cuda" if a GPU is available
model_name = get_model_list()[0]  # Load the first model from the available list
fer = EmotiEffLibRecognizer(engine="onnx", model_name=model_name, device=device)  # Initialize recognizer
frame_interval = 30  # Process every 30th frame
frame_count = 0  # Counter to track frames

# Set desired resolution
desired_width = 960
desired_height = 720

# Initialize video capture
cap = cv2.VideoCapture(0)  # Use the default webcam

if not cap.isOpened():
    print("Error: Could not open webcam.")
    exit()

# Create a real-time display window
cv2.namedWindow("Emotion Recognition", cv2.WINDOW_NORMAL)

try:
    while True:
        success, frame_bgr = cap.read()
        if not success:
            print("Error: Could not read frame from webcam.")
            break

        # Resize the frame to the desired resolution
        frame_bgr = cv2.resize(frame_bgr, (desired_width, desired_height))

        # Increment frame count
        frame_count += 1

        # Process every 30th frame
        if frame_count % frame_interval == 0:
            # Convert frame to RGB for processing
            frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)

            # Detect faces in the frame
            facial_images = recognize_faces(frame_rgb, device)

            if len(facial_images) > 0:
                emotions = []
                for face_img in facial_images:
                    # Predict emotions for each detected face
                    emotion, _ = fer.predict_emotions(face_img, logits=True)
                    emotions.append(emotion[0])

                    # Display the detected face and emotion in Matplotlib (optional)
                    plt.figure(figsize=(3, 3))
                    plt.axis('off')
                    plt.imshow(face_img)
                    plt.title(emotion[0])
                    plt.show()

                    # Generate and speak a quote every 30 frames
                    quote = get_motivational_quote(emotion)
                    print(f"Emotion: {emotion}, Quote: {quote}")
                    speak_quote(quote)  # Speak the generated quote

                # Annotate the original frame with emotions
                for idx, face_img in enumerate(facial_images):
                    cv2.putText(frame_bgr, f"{emotions[idx]}", (50, 50 + idx * 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)

        # D'isplay the video stream with annotations
        cv2.imshow("Emotion Recognition", frame_bgr)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            print("Exiting...")
            break

finally:
    # Release resources
    cap.release()
    cv2.destroyAllWindows()

2025-03-22 17:20:01.302714: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-03-22 17:20:22.522 python[43266:3195178] +[IMKClient subclass]: chose IMKClient_Modern
2025-03-22 17:20:22.522 python[43266:3195178] +[IMKInputSession subclass]: chose IMKInputSession_Modern
