In [2]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
from glob import glob 
from sklearn.linear_model import LogisticRegression
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import pickle
from skimage import feature
import os
import pandas as pd
from collections import Counter
from itertools import chain
from sklearn.decomposition import KernelPCA
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision


In [3]:
file_path = 'labels/projectLabelsRaw.csv'
labels = pd.read_csv(file_path)
root = 'Videos'

file_names = sorted(glob(root+"/*"), key=lambda x: int(os.path.splitext(os.path.basename(x))[0]))
labels.head()

Unnamed: 0,annotation_id,annotator,created_at,id,lead_time,tricks,updated_at,url
0,1,1,2024-10-10T05:22:48.696876Z,1,51.152,"[{""start"":0.00703674832962138,""end"":4.07075890...",2024-10-10T05:22:48.696894Z,/data/upload/1/53888dc4-img-6368_2160p_25fps_1...
1,2,1,2024-10-10T05:24:24.742398Z,2,95.846,"[{""start"":0.7558730512249444,""end"":1.329608017...",2024-10-10T05:24:24.742417Z,/data/upload/1/0247b9d2-img-6369_1080p_25fps_1...
2,3,1,2024-10-10T05:25:53.957437Z,3,89.028,"[{""start"":5.656628619153675,""end"":6.4680066815...",2024-10-10T05:25:53.957457Z,/data/upload/1/a0686400-img-6370_1080p_25fps_1...
3,4,1,2024-10-10T05:26:43.707072Z,4,49.552,"[{""start"":1.9139955456570157,""end"":2.923768930...",2024-10-10T05:26:43.707090Z,/data/upload/1/91c7ef02-img-6371_1080p_25fps_1...
4,6,1,2024-10-14T02:26:48.192917Z,5,171.526,"[{""start"":0.6705745967741936,""end"":2.826729838...",2024-10-14T02:26:48.192935Z,/data/upload/1/96a0206d-1-hecho-con-clipchamp_...


In [4]:
file_names

['Videos/1.webm',
 'Videos/2.webm',
 'Videos/3.webm',
 'Videos/4.webm',
 'Videos/5.webm',
 'Videos/6.webm',
 'Videos/7.webm',
 'Videos/8.webm',
 'Videos/9.webm',
 'Videos/10.webm',
 'Videos/11.webm',
 'Videos/12.webm',
 'Videos/13.webm',
 'Videos/14.webm',
 'Videos/15.webm',
 'Videos/16.webm',
 'Videos/17.mp4',
 'Videos/18.mp4',
 'Videos/19.mp4',
 'Videos/20.mp4',
 'Videos/21.mp4',
 'Videos/22.mp4',
 'Videos/23.mp4',
 'Videos/24.mp4',
 'Videos/25.mp4',
 'Videos/27.mp4',
 'Videos/28.mp4',
 'Videos/29.mp4',
 'Videos/30.mp4',
 'Videos/33.mp4',
 'Videos/34.mp4',
 'Videos/35.mp4',
 'Videos/36.mp4',
 'Videos/37.webm',
 'Videos/38.webm',
 'Videos/39.webm',
 'Videos/40.webm',
 'Videos/41.webm',
 'Videos/42.webm',
 'Videos/43.webm',
 'Videos/44.webm',
 'Videos/45.webm',
 'Videos/46.webm',
 'Videos/47.webm',
 'Videos/48.webm',
 'Videos/49.webm',
 'Videos/50.webm',
 'Videos/51.webm']

In [5]:
fps = 30


# Función para convertir segundos a frames
def convert_to_frames(tricks, fps):
    tricks_dicts = eval(tricks)
    for trick in tricks_dicts:
        trick['start_frame'] = int(trick['start'] * fps)
        trick['end_frame'] = int(trick['end'] * fps)
    return tricks_dicts

# Aplicamos la conversión a todas las filas
labels['tricks_in_frames'] = labels['tricks'].apply(lambda x: convert_to_frames(x, fps))

# Mostramos las primeras filas con las etiquetas convertidas a frames
labels[['tricks', 'tricks_in_frames']].head()

Unnamed: 0,tricks,tricks_in_frames
0,"[{""start"":0.00703674832962138,""end"":4.07075890...","[{'start': 0.00703674832962138, 'end': 4.07075..."
1,"[{""start"":0.7558730512249444,""end"":1.329608017...","[{'start': 0.7558730512249444, 'end': 1.329608..."
2,"[{""start"":5.656628619153675,""end"":6.4680066815...","[{'start': 5.656628619153675, 'end': 6.4680066..."
3,"[{""start"":1.9139955456570157,""end"":2.923768930...","[{'start': 1.9139955456570157, 'end': 2.923768..."
4,"[{""start"":0.6705745967741936,""end"":2.826729838...","[{'start': 0.6705745967741936, 'end': 2.826729..."


In [6]:
labels.to_csv("labels/UpdatedLables.csv", index=False)

In [7]:
labelsNew = list()
labels_by_frames = list()
last_end = 0
trfr = labels['tricks_in_frames']

for n in range(len(trfr)):
    file_name = file_names[n]
    video = cv2.VideoCapture(file_name)  
    numframes = 0
    while(video.isOpened()):
        ret, f = video.read()
        numframes+=1
        if not ret:
            break
    f = 0
    video.release()
    tricks = trfr[n]
    labels_by_frames.append([])
    true_len = numframes
    for trick in range(len(tricks)):
        start = tricks[trick]['start_frame']
        end = tricks[trick]['end_frame']
        labelsl = tricks[trick]['labels']
        
        if(start > last_end + 1):
            labels_by_frames[n].append({
            'start_frame': last_end +1,
            'end_frame': start,
            'labels': None
            })

        labels_by_frames[n].append({
            'start_frame': start,
            'end_frame': end,
            'labels': labelsl
        })

        if(trick == len(tricks)-1 and end < true_len):

            labels_by_frames[n].append({
                'start_frame': end,
                'end_frame': true_len,
                'labels': None
            })
        last_end = end




In [8]:
print(labels_by_frames[0])

[{'start_frame': 0, 'end_frame': 122, 'labels': ['walking_to_camera']}, {'start_frame': 122, 'end_frame': 189, 'labels': ['walking_away']}, {'start_frame': 189, 'end_frame': 190, 'labels': None}]


In [9]:
max_frames = []
for n in range(len(labels_by_frames)):
   a = []
   max_frames.append(max([trick['end_frame'] for trick in labels_by_frames[n]]))

frames_labels = []
# Inicializamos una lista donde cada índice representa un frame
for max_frame in max_frames:
   frames_labels.append([None] * (max_frame + 1)) # +1 para incluir el último frame

# Rellenamos los frames con sus etiquetas según los rangos de start_frame y end_frame
for n in range(len(frames_labels)):
   for idx, trick in enumerate(labels_by_frames[n]):
         for frame in range(trick['start_frame'], trick['end_frame'] + 1):
            frames_labels[n][frame] = trick['labels']


frames_labels[0][:10]

[['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera']]

In [10]:

groups = []
labels = []
dic_clases = {}
file_tipe = ""

for n in frames_labels:
    print(len(n))


191
249
313
193
462
315
301
319
325
307
310
295
307
323
287
329
345
349
287
420
332
170
306
302
145
327
341
335
308
340
597
408
393
380
297
359
314
762
334
417
510
342
627
322
435
574
342
674


In [11]:
def draw_landmarks_on_image(rgb_image, detection_result):
  pose_landmarks_list = detection_result.pose_landmarks
  annotated_image = np.copy(rgb_image)


  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]


    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_ladnmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())
  return annotated_image

In [15]:
complete_frame_landmarks = list()
for h  in tqdm(range(len(file_names)-1)):
    groups = []
    labels = []
    file_name = file_names[h]
    video = cv2.VideoCapture(file_name)  
    frames = []
    while(video.isOpened()):
        ret, f = video.read()
        if(f is not None):
            #f = cv2.resize(f,(500,500)) 
            frames.append(f)
        else:
            video.release()
            break
    
    base_options = python.BaseOptions(model_asset_path='pose_landmarker_lite.task')
    options = vision.PoseLandmarkerOptions(
        base_options=base_options,
        output_segmentation_masks=True)
    mp_pose = mp.solutions.pose
    detector = mp_pose.Pose(model_complexity=0)

    for frame in frames:
        if frame.shape[-1] == 3:  
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        else:
            frame_rgb = frame  
        

        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame_rgb)


        detection_result = detector.process(frame_rgb)

        if detection_result.pose_landmarks:
            
            frame_landmarks = list()
            for i, landmark in enumerate(detection_result.pose_landmarks.landmark):
                
                frame_data = dict()
                #landmark = landmarkl[n]
                x = landmark.x
                y = landmark.y
                z = landmark.z
                vis = landmark.visibility
                frame_data[f'landmark_{i}_x'] = x
                frame_data[f'landmark_{i}_y'] = y
                frame_data[f'landmark_{i}_z'] = z
                frame_data[f'landmark_{i}_vis'] = vis
                frame_landmarks.append(frame_data.copy())
            complete_frame_landmarks.append(frame_landmarks.copy())
            
            # Añadir el diccionario a la lista
            
        
        
        
        #annotated_image = draw_landmarks_on_image(frame_rgb, detection_result)
        
        #cv2.imshow("Pose Detection", cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))

        #if cv2.waitKey(1) & 0xFF == ord('q'):
           # break
    
    
'''
    for n in range(len(complete_frame_landmarks )-5):

            group = []
            group_labels = []
            for i in range(n, n+5):
                group.append(complete_frame_landmarks[i])
                group_labels.append(frames_labels[h][i])
            

            flattened_labels = list(chain.from_iterable(
                label if label is not None else  [None]  
                for label in group_labels
            ))

            if flattened_labels:
                most_common_label = Counter(flattened_labels).most_common(1)[0][0]
            else:
                most_common_label = None 
            
            groups.append(group)
            labels.append(most_common_label)
    
    '''


  0%|          | 0/47 [00:00<?, ?it/s]I0000 00:00:1731788663.731151   26573 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1731788663.731946   33563 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer: AMD Radeon RX 6650 XT (navi23, LLVM 15.0.7, DRM 3.42, 5.15.0-125-generic)
W0000 00:00:1731788663.791666   33549 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1731788663.810056   33559 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
  2%|▏         | 1/47 [00:06<04:55,  6.41s/it]I0000 00:00:1731788669.060285   26573 gl_context_egl.cc:85] Successfully initialized EGL. Major : 1 Minor: 5
I0000 00:00:1731788669.061053   33623 gl_context.cc:357] GL version: 3.2 (OpenGL ES 3.2 Mesa 23.2.1-1ubuntu3.1~22.04.2), renderer

'\n    for n in range(len(complete_frame_landmarks )-5):\n\n            group = []\n            group_labels = []\n            for i in range(n, n+5):\n                group.append(complete_frame_landmarks[i])\n                group_labels.append(frames_labels[h][i])\n            \n\n            flattened_labels = list(chain.from_iterable(\n                label if label is not None else  [None]  \n                for label in group_labels\n            ))\n\n            if flattened_labels:\n                most_common_label = Counter(flattened_labels).most_common(1)[0][0]\n            else:\n                most_common_label = None \n            \n            groups.append(group)\n            labels.append(most_common_label)\n    \n    '

In [16]:
organized_frames=list()
for n in complete_frame_landmarks:
    thing = dict()
    for land  in n:
        for title in land:
            thing[title] = land[title]
    organized_frames.append(thing.copy())

organized_frames

[{'landmark_0_x': 0.5043125152587891,
  'landmark_0_y': 0.3745996356010437,
  'landmark_0_z': -0.1616305261850357,
  'landmark_0_vis': 0.9999620914459229,
  'landmark_1_x': 0.5143917798995972,
  'landmark_1_y': 0.3730083703994751,
  'landmark_1_z': -0.1548425406217575,
  'landmark_1_vis': 0.9999382495880127,
  'landmark_2_x': 0.5179400444030762,
  'landmark_2_y': 0.3765362501144409,
  'landmark_2_z': -0.15489940345287323,
  'landmark_2_vis': 0.9999212026596069,
  'landmark_3_x': 0.5199494361877441,
  'landmark_3_y': 0.37940508127212524,
  'landmark_3_z': -0.1549660861492157,
  'landmark_3_vis': 0.9999303817749023,
  'landmark_4_x': 0.5097709894180298,
  'landmark_4_y': 0.36631596088409424,
  'landmark_4_z': -0.14650264382362366,
  'landmark_4_vis': 0.9999165534973145,
  'landmark_5_x': 0.5093128681182861,
  'landmark_5_y': 0.3644658923149109,
  'landmark_5_z': -0.14660684764385223,
  'landmark_5_vis': 0.999883770942688,
  'landmark_6_x': 0.5065686702728271,
  'landmark_6_y': 0.36222088

In [17]:
print(frames_labels[0][0])

['walking_to_camera']


In [18]:
flattened_labels =list()

for n in frames_labels:
    for i in n:
        flattened_labels.append(i)

flattened_labels

[['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_camera'],
 ['walking_to_ca

In [19]:

for n in range(len(organized_frames)):
    organized_frames[n]['labels'] = flattened_labels[n]
    organized_frames[n]['frame'] = n

for n in organized_frames[0]:
    print(n)

landmark_0_x
landmark_0_y
landmark_0_z
landmark_0_vis
landmark_1_x
landmark_1_y
landmark_1_z
landmark_1_vis
landmark_2_x
landmark_2_y
landmark_2_z
landmark_2_vis
landmark_3_x
landmark_3_y
landmark_3_z
landmark_3_vis
landmark_4_x
landmark_4_y
landmark_4_z
landmark_4_vis
landmark_5_x
landmark_5_y
landmark_5_z
landmark_5_vis
landmark_6_x
landmark_6_y
landmark_6_z
landmark_6_vis
landmark_7_x
landmark_7_y
landmark_7_z
landmark_7_vis
landmark_8_x
landmark_8_y
landmark_8_z
landmark_8_vis
landmark_9_x
landmark_9_y
landmark_9_z
landmark_9_vis
landmark_10_x
landmark_10_y
landmark_10_z
landmark_10_vis
landmark_11_x
landmark_11_y
landmark_11_z
landmark_11_vis
landmark_12_x
landmark_12_y
landmark_12_z
landmark_12_vis
landmark_13_x
landmark_13_y
landmark_13_z
landmark_13_vis
landmark_14_x
landmark_14_y
landmark_14_z
landmark_14_vis
landmark_15_x
landmark_15_y
landmark_15_z
landmark_15_vis
landmark_16_x
landmark_16_y
landmark_16_z
landmark_16_vis
landmark_17_x
landmark_17_y
landmark_17_z
landmark_17_

In [20]:
df = pd.DataFrame(organized_frames)


In [None]:
df.to_csv('datos.csv', index=False)

: 