In [2]:
import cv2
import mediapipe as mp
import os

In [12]:
def initialize_mediapipe():
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils
    hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)
    return mp_hands, mp_drawing, hands

def procesar_frame(frame, hands):
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image.flags.writeable = False
    results = hands.process(image)
    return results

def calcular_diferencia(puntos_previos,puntos_actuales, handedness_prev, handedness_actual):
    if puntos_previos is None or puntos_actuales is None:
            return 10.0
    if len(puntos_previos) != len(puntos_actuales):
        return 10.0

    num_hands_previas = len(puntos_previos)
    num_hands_actuales = len(puntos_actuales)

    if num_hands_previas != num_hands_actuales:
        return 10.0

    total_diff = 0.0
    if len(puntos_previos) == len(puntos_actuales) and len(puntos_actuales) == 2:
        if handedness_prev[0].classification[0].label != handedness_actual[0].classification[0].label:
            puntos_actuales = puntos_actuales[::-1]
    for i in range(num_hands_previas):
        hand_prev = puntos_previos[i].landmark
        hand_actual = puntos_actuales[i].landmark

        diff = sum([
            abs(lm1.x - lm2.x) + abs(lm1.y - lm2.y) + abs(lm1.z - lm2.z)
            for lm1, lm2 in zip(hand_prev, hand_actual)
        ])
        total_diff += diff
    total_diff/=21
    if num_hands_previas >= 2:
        total_diff /= num_hands_previas
    return float(total_diff)
def extractFrames(video):
    ret = 1
    while ret:
        ret, frame = video.read()
        if not ret:
            break
        yield frame
    video.release()

def extractKeyFrames(video, min_frame_interval=1):
    mp_hands, mp_drawing, hands = initialize_mediapipe()
    puntos_previos = []
    handedness_prev = []
    key_frames = []
    frame_count = 0
    thresholdsInKF=[]
    thresholdsNKF=[]
    adjust_th=True

    for frame in extractFrames(video):
        if len(key_frames)==0:
            results = procesar_frame(frame,hands)
            if results[0].multi_hand_landmarks:
                handedness_prev = results[0].multi_handedness if results[0].multi_handedness else []
                puntos_previos=results[0].multi_hand_landmarks
        elif frame_count - key_frames[-1][1] > min_frame_interval:
            results = procesar_frame(frame,hands)
            if results[0].multi_hand_landmarks:
                puntos_actuales = results[0].multi_hand_landmarks
                handedness_actual = results[0].multi_handedness if results[0].multi_handedness else []
                diff = calcular_diferencia(puntos_actuales=puntos_actuales,puntos_previos=puntos_previos,handedness_actual=handedness_actual,handedness_prev=handedness_prev)
                print(diff)

In [4]:
# def initialize_mediapipe():
#     mp_hands = mp.solutions.hands
#     mp_drawing = mp.solutions.drawing_utils
#     hands = mp_hands.Hands(static_image_mode=False, max_num_hands=2, min_detection_confidence=0.5, min_tracking_confidence=0.5)
#     return mp_hands, mp_drawing, hands

# def procesar_frame(frame, hands):
#     image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
#     image.flags.writeable = False
#     results = hands.process(image)
#     return results

# def calcular_diferencia(puntos_previos,puntos_actuales, handedness_prev, handedness_actual):
#     if puntos_previos is None or puntos_actuales is None:
#         return 10.0
    
#     # Comprobamos si alguna de las listas está vacía o si las dos listas tienen diferentes longitudes
#     if len(puntos_previos) != len(puntos_actuales):
#         return 10.0

#     num_hands_previas = len(puntos_previos)
#     num_hands_actuales = len(puntos_actuales)

#     if num_hands_previas != num_hands_actuales:
#         return 10.0

#     total_diff = 0.0
#     if len(puntos_previos) == len(puntos_actuales) and len(puntos_actuales) == 2:
#         if handedness_prev[0].classification[0].label != handedness_actual[0].classification[0].label:
#             puntos_actuales = puntos_actuales[::-1]
#     for i in range(num_hands_previas):
#         hand_prev = puntos_previos[i].landmark
#         hand_actual = puntos_actuales[i].landmark

#         diff = sum([
#             abs(lm1.x - lm2.x) + abs(lm1.y - lm2.y) + abs(lm1.z - lm2.z)
#             for lm1, lm2 in zip(hand_prev, hand_actual)
#         ])
#         total_diff += diff
#     print("Total dif", total_diff)
#     if num_hands_previas >= 2:
#         print(total_diff)
#         total_diff /= num_hands_previas
#     print(total_diff)
#     return float(total_diff)

# def extract_key_frames(video, threshold=4.4, min_frame_interval=3):
#     mp_hands, mp_drawing, hands = initialize_mediapipe()
#     puntos_previos = []
#     handedness_prev = []
#     key_frames = []
#     frame_count = 0
#     print("Iniciando la extracción de frames clave")

#     while video.isOpened():
#         ret, frame = video.read()
#         if not ret:
#             print("Fin del video o error al leer frame")
#             break
        
#         frame_count += 1
#         if len(key_frames)==0 or frame_count - key_frames[-1][1] > min_frame_interval:
#             results = procesar_frame(frame, hands)
            
#             if results.multi_hand_landmarks:
#                 puntos_actuales = results.multi_hand_landmarks
#                 handedness_actual = results.multi_handedness if results.multi_handedness else []
#                 diff = calcular_diferencia(puntos_previos, puntos_actuales, handedness_prev, handedness_actual)

#                 if diff > threshold:
#                     key_frames.append((frame, frame_count))
#                     puntos_previos = puntos_actuales
#                     handedness_prev = handedness_actual
#             else:
#                 puntos_previos = None

#     video.release()
#     return [frame for frame, _ in key_frames]

# def save_key_frames(key_frames, output_folder):
#     for idx, key_frame in enumerate(key_frames):
#         cv2.imwrite(f'{output_folder}/key_frame_{idx}.png', key_frame)

In [13]:
video_path = 'C:\\Users\\48113164\\Documents\\GitHub\\SignAI-IA.dev\\AI-ML_Development\\Resources\\videoprueba1.mp4'
video = cv2.VideoCapture(video_path)
if not os.path.isfile(video_path):
    print(f"Error: El archivo {video_path} no existe.")
elif not video.isOpened():
    print("Error: No se pudo abrir el video.")
else:
    key_frames = extractKeyFrames(video)



In [1]:
import numpy as np

# Definir los parámetros
min_threshold = 0.05
max_threshold = 0.4
mid_size = 0.15
k = 15.5 # Ajusta para cambiar la sensibilidad

def compute_threshold(hands_media_size):
    if hands_media_size <= 0.01:
        min_threshold = 0.045
        max_threshold = 0.09
        max_size = 0.01
        min_size = 0.003
    elif hands_media_size<=0.1:
        min_threshold=0.09
        max_threshold=0.175
        max_size=0.1
        min_size=0.01
    elif hands_media_size<=0.15:
        min_threshold=0.09
        max_threshold=0.
    elif hands_media_size<=0.2:
        min_threshold=0.175
        max_threshold=0.25
        max_size=0.2
        min_size=0.1
    else:
        min_threshold=0.25
        max_threshold=0.4
        max_size=0.4
        min_size=0.2
    # threshold = min_threshold + (max_threshold - min_threshold) / (1 + np.exp(-k * (hands_media_size - mid_size)))
    # return threshold
    return min_threshold +hands_media_size*(max_threshold-min_threshold)/(max_size-min_size)
    if hands_media_size <=0.005:
        return 0.04 +(hands_media_size)*(0.08-0.04)/(0.005)
    if hands_media_size <= 0.01:
        return 0.08 + (hands_media_size - 0.005) * (0.1 - 0.08) / (0.01 - 0.005)
    elif hands_media_size <= 0.015:
        return 0.1 + (hands_media_size - 0.01) * (0.125 - 0.1) / (0.015 - 0.01)
    elif hands_media_size <= 0.02:
        return 0.15 + (hands_media_size - 0.015) * (0.15 - 0.125) / (0.02 - 0.015)
    elif hands_media_size <= 0.25:
        return 0.2 + (hands_media_size - 0.02) * (0.275 - 0.225) / (0.2 - 0.02)
    else:
        return 0.25 + (hands_media_size - 0.2) * (0.4 - 0.25) / (0.3 - 0.2)

# Ejemplo de uso
for value in [0.003,0.007,0.009,0.015,0.02,0.025,0.03,0.05,0.06,0.07,0.08,0.1,0.15,0.2,0.25,0.3,0.35]:
    hands_media_size = value
    threshold = compute_threshold(hands_media_size)
    print(f'Threshold para tamaño de manos {hands_media_size}: {threshold}')

Threshold para tamaño de manos 0.003: 0.06428571428571428
Threshold para tamaño de manos 0.007: 0.09
Threshold para tamaño de manos 0.009: 0.10285714285714284
Threshold para tamaño de manos 0.015: 0.10416666666666666
Threshold para tamaño de manos 0.02: 0.10888888888888888
Threshold para tamaño de manos 0.025: 0.11361111111111111
Threshold para tamaño de manos 0.03: 0.11833333333333332
Threshold para tamaño de manos 0.05: 0.1372222222222222
Threshold para tamaño de manos 0.06: 0.14666666666666667
Threshold para tamaño de manos 0.07: 0.1561111111111111
Threshold para tamaño de manos 0.08: 0.16555555555555554
Threshold para tamaño de manos 0.1: 0.1844444444444444
Threshold para tamaño de manos 0.15: 0.2875
Threshold para tamaño de manos 0.2: 0.325
Threshold para tamaño de manos 0.25: 0.4375
Threshold para tamaño de manos 0.3: 0.475
Threshold para tamaño de manos 0.35: 0.5125


In [33]:
import numpy as np
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt

# Datos de entrada (tamaños y umbrales deseados)
tamaños = np.array([0.005, 0.012, 0.02, 0.03, 0.05, 0.1,0.16])
umbrales = np.array([0.08, 0.1, 0.13,0.14,0.155, 0.18, 0.225])

# Definir la función exponencial
def func_exp(t, a, b, c):
    return a * np.exp(b * t) + c

# Ajustar la función a los datos
popt, _ = curve_fit(func_exp, tamaños, umbrales)

# Parámetros ajustados
a, b, c = popt
print(f"Parámetros ajustados: a = {a}, b = {b}, c = {c}")

# Graficar los datos y la función ajustada
t_values = np.linspace(0, 0.3, 100)
umbral_values = func_exp(t_values, *popt)

plt.figure(figsize=(10, 6))
plt.plot(tamaños, umbrales, 'o', label='Datos')
plt.plot(t_values, umbral_values, '-', label='Función ajustada')
plt.xlabel('Tamaño del recuadro')
plt.ylabel('Umbral')
plt.title('Ajuste de la función exponencial')
plt.legend()
plt.grid(True)
plt.show()

RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 800.

In [30]:
def calcularTH(t):
    a=5.5
    b = 0.14
    c = -5.4
    threshold=a * np.exp(b * t) + c
    return threshold
calcularTH(0.08)

np.float64(0.16194625146474095)