In [1]:
!pip install deepface opencv-python-headless tqdm matplotlib mediapipe

Collecting deepface
  Downloading deepface-0.0.93-py3-none-any.whl.metadata (30 kB)
Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting flask-cors>=4.0.1 (from deepface)
  Downloading flask_cors-6.0.1-py3-none-any.whl.metadata (5.3 kB)
Collecting mtcnn>=0.1.0 (from deepface)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface)
  Downloading fire-0.7.0.tar.gz (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gunicorn>=20.1.0 (from deepface)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
INFO: pip is looking at multiple versions of mediapipe to determine which version is compatible with other requirem

In [17]:
from deepface import DeepFace
import cv2
from tqdm import tqdm
from collections import Counter, deque
import mediapipe as mp
import json

# Caminhos
input_video_path = './Video1.mp4'
output_video_path = './Saida_Video1.mp4'

# Inicializar MediaPipe Pose e FaceMesh
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)

hand_wave_history = deque(maxlen=5)
dancing_history = deque(maxlen=5)

def classify_activity(pose_landmarks):

    # Se não tem landmarks de pose, retorna desconhecida
    if not pose_landmarks:
        return "desconhecida"

    def is_visible(lm): return lm and lm.visibility > 0.5

    lm = pose_landmarks.landmark

    def get_safe(name):
        try:
            return lm[mp_pose.PoseLandmark[name]]
        except:
            return None

    # Pontos do corpo
    l_sh = get_safe("LEFT_SHOULDER")
    r_sh = get_safe("RIGHT_SHOULDER")
    l_hp = get_safe("LEFT_HIP")
    r_hp = get_safe("RIGHT_HIP")
    l_kn = get_safe("LEFT_KNEE")
    r_kn = get_safe("RIGHT_KNEE")
    l_wr = get_safe("LEFT_WRIST")
    r_wr = get_safe("RIGHT_WRIST")
    l_an = get_safe("LEFT_ANKLE")
    r_an = get_safe("RIGHT_ANKLE")
    r_eye = get_safe("RIGHT_EYE")
    l_eye = get_safe("LEFT_EYE")
    r_elbow = get_safe("RIGHT_ELBOW")
    l_elbow = get_safe("LEFT_ELBOW")

    # 1. Deitado
    if is_visible(r_eye) and is_visible(r_sh) and is_visible(l_eye):
        if (r_sh.y < r_eye.y) and (r_eye.y < l_eye.y):
            return "deitado"

    # 2. Acenando (com mão esquerda visível, mão direita não visível)
    if not is_visible(r_wr) and is_visible(l_wr) and is_visible(l_sh):
        is_waving = l_wr.y < l_sh.y
        hand_wave_history.append(is_waving)
        if hand_wave_history.count(True) >= 3:
            return "acenando"

    # 3. Dançando
    if is_visible(r_wr) and is_visible(l_wr) and is_visible(l_sh):
        is_dancing = l_wr.y < l_sh.y
        dancing_history.append(is_dancing)
        if dancing_history.count(True) >= 3:
            return "dancando"

    # 4. Pessoa de perfil
    if is_visible(r_sh):
      if (r_wr.x > r_elbow.x) or (r_sh.z < l_sh.z):
        return "perfil"

    # 5. Sorriso ou Careta
    if is_visible(r_eye) and is_visible(l_eye):
        return "sorriso/careta"

    return "desconhecida"


def detect_emotions(video_path, output_path, resize_factor=0.5, frame_skip=3):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Erro ao abrir o vídeo.")
        return

    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS)) or 30
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_size = (int(width * resize_factor), int(height * resize_factor))
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, frame_size)

    emotion_counter = Counter()
    activity_counter = Counter()
    unknown_activity_count = 0
    analyzed_frame_count = 0

    frame_index = 0
    face_emotion_history = {}
    previous_emotion = {}

    for _ in tqdm(range(total_frames), desc="Processando vídeo"):
        ret, frame = cap.read()
        if not ret:
            break

        if resize_factor != 1.0:
            frame = cv2.resize(frame, frame_size)

        frame_index += 1
        if frame_index % frame_skip != 0:
            out.write(frame)
            continue

        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results_pose = pose.process(rgb_frame)

        atividade = classify_activity(results_pose.pose_landmarks)

        analyzed_frame_count += 1
        activity_counter[atividade] += 1
        if atividade == "desconhecida":
            unknown_activity_count += 1

        try:
            result = DeepFace.analyze(
                frame,
                actions=['emotion'],
                enforce_detection=False,
                detector_backend='retinaface'
            )
        except Exception as e:
            print(f"[Frame {frame_index}] Erro ao analisar: {e}")
            out.write(frame)
            continue

        if isinstance(result, list) and result:
            for face in result:
                try:
                    region = face.get('region', {})
                    x = max(0, region.get('x', 0))
                    y = max(0, region.get('y', 0))
                    w = min(region.get('w', 0), frame.shape[1] - x)
                    h = min(region.get('h', 0), frame.shape[0] - y)

                    dominant_emotion = face.get('dominant_emotion', None)
                    if dominant_emotion:
                        emotion_counter[dominant_emotion] += 1

                    face_id = (x, y, w, h)

                    if face_id not in face_emotion_history:
                        face_emotion_history[face_id] = deque(maxlen=10)

                    face_emotion_history[face_id].append(dominant_emotion)

                    emotion_freq = Counter(face_emotion_history[face_id])
                    most_common_emotion, count = emotion_freq.most_common(1)[0]

                    smoothed_emotion = most_common_emotion if count >= 5 else previous_emotion.get(face_id, most_common_emotion)
                    previous_emotion[face_id] = smoothed_emotion

                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
                    text_y = y - 10 if y - 10 > 20 else y + h + 20
                    cv2.putText(frame, smoothed_emotion, (x, text_y),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
                    cv2.putText(frame, atividade, (x, text_y + 25),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (36, 255, 12), 2)
                except Exception as e:
                    print(f"Erro ao processar face: {e}")
                    continue

        out.write(frame)

    cap.release()
    out.release()

    # Gerar relatório automático
    report = {
        "Total de frames analisados": analyzed_frame_count,
        "Total de anomalias (atividade 'desconhecida')": unknown_activity_count,
        "Distribuição de emoções": dict(emotion_counter),
        "Distribuição de atividades": dict(activity_counter)
    }

    with open("relatorio_analise_video.json", "w", encoding="utf-8") as f:
        json.dump(report, f, indent=4, ensure_ascii=False)

    print("Relatório salvo em: relatorio_analise_video.json")


# Execução principal
detect_emotions(input_video_path, output_video_path)


Processando vídeo: 100%|██████████| 3326/3326 [06:39<00:00,  8.33it/s]

Relatório salvo em: relatorio_analise_video.json



