In [2]:
import cv2
import mediapipe as mp
import numpy as np
import tqdm
import pandas as pd
from collections import deque

Name = 'Bruna'  # Nome do arquivo de saída

# Caminhos de input e output
VIDEO_INPUT_PATH = "/Users/borba/Desktop/bruna.mp4"  # Caminho do vídeo de entrada
VIDEO_OUTPUT_PATH = f"/Users/borba/Desktop/video_tracked_{Name}.mp4"  # Caminho do vídeo de saída
EXCEL_OUTPUT_PATH = f"/Users/borba/Desktop/hand_tracking_data_{Name}.xlsx"  # Caminho do Excel de saída

# Inicializar MediaPipe Pose e Hands
mp_pose = mp.solutions.pose
mp_hands = mp.solutions.hands

pose = mp_pose.Pose(min_detection_confidence=0.7, min_tracking_confidence=0.7, model_complexity=2)
hands = mp_hands.Hands(min_detection_confidence=0.7, min_tracking_confidence=0.7, max_num_hands=2)

# Função para suavização (média móvel)
def smooth_points(history, new_point, maxlen=5):
    if new_point:
        history.append(new_point)
        smoothed_point = np.mean(history, axis=0)
        return tuple(map(int, smoothed_point))
    return None

# Função para calcular pixels/cm
def get_pixel_to_cm_ratio(frame):
    print("Clique em dois pontos para definir uma distância de 33 cm. Pressione 'Q' para continuar o processamento.")
    points = []

    def mouse_callback(event, x, y, flags, param):
        if event == cv2.EVENT_LBUTTONDOWN:
            points.append((x, y))
            cv2.circle(frame, (x, y), 5, (0, 255, 0), -1)
            if len(points) == 2:
                cv2.line(frame, points[0], points[1], (0, 255, 0), 2)
                cv2.imshow("Frame", frame)

    cv2.imshow("Frame", frame)
    cv2.setMouseCallback("Frame", mouse_callback)

    while True:
        cv2.imshow("Frame", frame)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):  # Pressionar 'Q' para sair
            break

    cv2.destroyAllWindows()

    if len(points) == 2:
        pixel_distance = np.linalg.norm(np.array(points[0]) - np.array(points[1]))
        return pixel_distance / 33  # 33 cm de referência
    else:
        raise ValueError("Você deve selecionar exatamente dois pontos.")

# Carregar o vídeo
cap = cv2.VideoCapture(VIDEO_INPUT_PATH)

# Obter informações do vídeo
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Criar um novo vídeo para salvar a saída
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(VIDEO_OUTPUT_PATH, fourcc, fps, (frame_width, frame_height))

# Obter a proporção pixels/cm no primeiro frame
ret, first_frame = cap.read()
if not ret:
    raise ValueError("Não foi possível ler o vídeo.")
pixel_to_cm_ratio = get_pixel_to_cm_ratio(first_frame)
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)  # Reiniciar o vídeo para o início

# Criar históricos para suavização
history = {
    "Shoulder_R": deque(maxlen=5), "Elbow_R": deque(maxlen=5), "Wrist_R": deque(maxlen=5), "MCP_R": deque(maxlen=5),
    "Shoulder_L": deque(maxlen=5), "Elbow_L": deque(maxlen=5), "Wrist_L": deque(maxlen=5), "MCP_L": deque(maxlen=5)
}

# Criar lista para armazenar dados
data = []

# Criar barra de progresso
progress_bar = tqdm.tqdm(total=total_frames, desc="Processando Vídeo", unit="frame")

frame_count = 0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    frame_count += 1
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Calcular o tempo correspondente ao frame
    time = frame_count / fps

    # Processar a pose
    pose_results = pose.process(image_rgb)

    # Inicializar pontos do corpo
    shoulder_r = elbow_r = wrist_r = mcp_r = None
    shoulder_l = elbow_l = wrist_l = mcp_l = None

    if pose_results.pose_landmarks:
        landmarks = pose_results.pose_landmarks.landmark

        # Lado direito
        shoulder_r = smooth_points(history["Shoulder_R"], 
                                   (landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].x * frame_width,
                                    landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].y * frame_height))
        elbow_r = smooth_points(history["Elbow_R"], 
                                (landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW].x * frame_width,
                                 landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW].y * frame_height))

        # Lado esquerdo
        shoulder_l = smooth_points(history["Shoulder_L"], 
                                   (landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].x * frame_width,
                                    landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].y * frame_height))
        elbow_l = smooth_points(history["Elbow_L"], 
                                (landmarks[mp_pose.PoseLandmark.LEFT_ELBOW].x * frame_width,
                                 landmarks[mp_pose.PoseLandmark.LEFT_ELBOW].y * frame_height))

    # Processar as mãos
    hands_results = hands.process(image_rgb)

    if hands_results.multi_hand_landmarks:
        for i, hand_landmarks in enumerate(hands_results.multi_hand_landmarks):
            # Determinar se é a mão direita ou esquerda
            hand_type = "R" if i == 0 else "L"

            # Ponto 0 (WRIST) e ponto 5 (MCP)
            wrist = smooth_points(history[f"Wrist_{hand_type}"], 
                                  (hand_landmarks.landmark[0].x * frame_width,
                                   hand_landmarks.landmark[0].y * frame_height))
            mcp = smooth_points(history[f"MCP_{hand_type}"], 
                                (hand_landmarks.landmark[5].x * frame_width,
                                 hand_landmarks.landmark[5].y * frame_height))

            if hand_type == "R":
                wrist_r, mcp_r = wrist, mcp
            else:
                wrist_l, mcp_l = wrist, mcp

    # Adicionar dados ao array
    data.append([
        frame_count, time,
        mcp_r[0] if mcp_r else None, mcp_r[1] if mcp_r else None,
        wrist_r[0] if wrist_r else None, wrist_r[1] if wrist_r else None,
        elbow_r[0] if elbow_r else None, elbow_r[1] if elbow_r else None,
        shoulder_r[0] if shoulder_r else None, shoulder_r[1] if shoulder_r else None,
        mcp_l[0] if mcp_l else None, mcp_l[1] if mcp_l else None,
        wrist_l[0] if wrist_l else None, wrist_l[1] if wrist_l else None,
        elbow_l[0] if elbow_l else None, elbow_l[1] if elbow_l else None,
        shoulder_l[0] if shoulder_l else None, shoulder_l[1] if shoulder_l else None,
    ])

    # Desenhar os pontos no vídeo
    for point, color in zip(
        [shoulder_r, elbow_r, wrist_r, mcp_r, shoulder_l, elbow_l, wrist_l, mcp_l],
        [(255, 0, 0)] * 8  # Todos os pontos em vermelho
    ):
        if point:
            cv2.circle(frame, tuple(map(int, point)), 8, color, -1)

    # Escrever no novo vídeo
    out.write(frame)

    # Atualizar barra de progresso
    progress_bar.update(1)

cap.release()
out.release()
progress_bar.close()

# Salvar dados em um arquivo Excel
columns = [
    "Frame", "Time",
    "MCP_R_X", "MCP_R_Y", "Wrist_R_X", "Wrist_R_Y", "Elbow_R_X", "Elbow_R_Y", "Shoulder_R_X", "Shoulder_R_Y",
    "MCP_L_X", "MCP_L_Y", "Wrist_L_X", "Wrist_L_Y", "Elbow_L_X", "Elbow_L_Y", "Shoulder_L_X", "Shoulder_L_Y"
]
df = pd.DataFrame(data, columns=columns)
df.to_excel(EXCEL_OUTPUT_PATH, index=False)

print(f"Processo concluído! Vídeo salvo como {VIDEO_OUTPUT_PATH} e dados salvos como {EXCEL_OUTPUT_PATH}.")


I0000 00:00:1738090927.826997 5615202 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1
I0000 00:00:1738090927.839689 5615202 gl_context.cc:357] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M1
W0000 00:00:1738090927.877034 5623458 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738090927.890451 5623458 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1738090927.973226 5623455 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Clique em dois pontos para definir uma distância de 33 cm. Pressione 'Q' para continuar o processamento.


W0000 00:00:1738090928.086210 5623455 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
Processando Vídeo: 100%|██████████| 2335/2335 [04:18<00:00,  9.02frame/s]


Processo concluído! Vídeo salvo como /Users/borba/Desktop/video_tracked_Bruna.mp4 e dados salvos como /Users/borba/Desktop/hand_tracking_data_Bruna.xlsx.
