# Experimentos para el preprocesamiento de los datos y arquitectura del modelo.
En este cuaderno se van a realizara un exploracion del dataset de las anotsaciones de esqueletos 2d y tambiÃ©n se experimentara con distintas arquitecturas. 

In [None]:
# Create a DataFrame summary and a PyTorch Dataset for UCF101 2D skeletons
# This cell loads the pickle, creates a short DataFrame for easier inspection,
# and defines UCFSkeletonDataset which returns a chosen person's keypoints as a tensor.
import pickle
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset

# Load the pickle file
ucfdataset = 'Dataset/2d-skels/ucf101_2d.pkl'
with open(ucfdataset, 'rb') as f:
    ucf_data = pickle.load(f)

annotations = ucf_data.get('annotations', [])

# Build a compact DataFrame that summarizes each annotation for quick inspection
rows = []
for ann in annotations:
    kp = ann.get('keypoint')
    kp_shape = None
    try:
        if kp is not None:
            kp_shape = np.array(kp).shape
    except Exception:
        kp_shape = None
    rows.append({
        'frame_dir': ann.get('frame_dir'),
        'total_frames': ann.get('total_frames'),
        'img_shape': ann.get('img_shape'),
        'original_shape': ann.get('original_shape'),
        'label': int(ann.get('label')) if ann.get('label') is not None else None,
        'keypoint_shape': kp_shape,
        'has_keypoint_score': ('keypoint_score' in ann) and (ann.get('keypoint_score') is not None)
    })

df_ann = pd.DataFrame(rows)

# Define a Dataset wrapper
class UCFSkeletonDataset(Dataset):
    """Dataset wrapping the annotations list from ucf101_2d.pkl.
    For samples with multiple persons (M>1) and keypoint_score available,
    it selects the person with highest mean keypoint_score. Otherwise selects person 0.

    Returns a dict with:
      - 'keypoint': torch.FloatTensor shaped (T, V, C)
      - 'label': int
      - 'frame_dir', 'total_frames'
      - optionally 'score': torch.FloatTensor shaped (T, V)
    """
    def __init__(self, annotations_list, select_best_by_score=True, dtype=torch.float32):
        self.annotations = annotations_list
        self.select_best_by_score = select_best_by_score
        self.dtype = dtype

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        ann = self.annotations[idx]
        kp = ann.get('keypoint')
        kp_score = ann.get('keypoint_score', None)

        # ensure numpy
        kp = np.array(kp)  # M x T x V x C

        if kp.ndim != 4:
            raise ValueError(f"Unexpected keypoint shape for idx={idx}: {kp.shape}")

        M = kp.shape[0]

        # choose person
        person_idx = 0
        if M > 1 and kp_score is not None and self.select_best_by_score:
            scores = np.array(kp_score)  # M x T x V
            # mean over time and keypoints
            mean_scores = scores.mean(axis=(1,2))
            person_idx = int(mean_scores.argmax())

        person_kp = kp[person_idx]  # T x V x C

        # convert to torch tensor
        tensor_kp = torch.from_numpy(person_kp.astype(np.float32))

        sample = {
            'keypoint': tensor_kp,  # (T, V, C)
            'label': int(ann.get('label')) if ann.get('label') is not None else None,
            'frame_dir': ann.get('frame_dir'),
            'total_frames': ann.get('total_frames')
        }

        if kp_score is not None:
            score_np = np.array(kp_score)[person_idx]  # T x V
            sample['score'] = torch.from_numpy(score_np.astype(np.float32))

        return sample

# Create dataset instance
ucf_dataset = UCFSkeletonDataset(annotations)

# Quick checks / example
print(f"Loaded {len(annotations)} annotations. df_ann shape: {df_ann.shape}")
print(df_ann.head())
if len(ucf_dataset) > 0:
    s = ucf_dataset[0]
    print('\nExample sample keys:', list(s.keys()))
    print('keypoint tensor shape (T, V, C):', s['keypoint'].shape)
    if 'score' in s:
        print('score tensor shape (T, V):', s['score'].shape)
    print('label:', s['label'])
else:
    print('Dataset is empty.')


Loaded 13320 annotations. df_ann shape: (13320, 7)
                  frame_dir  total_frames   img_shape original_shape  label  \
0  v_ApplyEyeMakeup_g08_c01           119  (256, 340)     (256, 340)      0   
1  v_ApplyEyeMakeup_g08_c02           116  (256, 340)     (256, 340)      0   
2  v_ApplyEyeMakeup_g08_c03           145  (256, 340)     (256, 340)      0   
3  v_ApplyEyeMakeup_g08_c04           223  (256, 340)     (256, 340)      0   
4  v_ApplyEyeMakeup_g08_c05           275  (256, 340)     (256, 340)      0   

    keypoint_shape  has_keypoint_score  
0  (1, 119, 17, 2)                True  
1  (1, 116, 17, 2)                True  
2  (1, 145, 17, 2)                True  
3  (2, 223, 17, 2)                True  
4  (1, 275, 17, 2)                True  

Example sample keys: ['keypoint', 'label', 'frame_dir', 'total_frames', 'score']
keypoint tensor shape (T, V, C): torch.Size([119, 17, 2])
score tensor shape (T, V): torch.Size([119, 17])
label: 0
