<a href="https://colab.research.google.com/github/LeoDinga/DL_Project/blob/main/extract_keypoints.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install mediapipe opencv-python tqdm --upgrade
import os
# os.kill(os.getpid(), 9)  # Reinicia o runtime para aplicar as mudanças

Collecting mediapipe
  Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.7 kB)
Collecting numpy<2 (from mediapipe)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.2-py3-none-any.whl.metadata (1.6 kB)
Downloading mediapipe-0.10.21-cp311-cp311-manylinux_2_28_x86_64.whl (35.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.6/35.6 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m73.

In [2]:

!git clone https://github.com/LeoDinga/DL_Project.git
%cd DL_Project


Cloning into 'DL_Project'...
remote: Enumerating objects: 285, done.[K
remote: Counting objects: 100% (155/155), done.[K
remote: Compressing objects: 100% (149/149), done.[K
remote: Total 285 (delta 65), reused 12 (delta 5), pack-reused 130 (from 1)[K
Receiving objects: 100% (285/285), 34.77 MiB | 24.04 MiB/s, done.
Resolving deltas: 100% (140/140), done.
/content/DL_Project


In [3]:
import mediapipe as mp
import cv2
import numpy as np
from tqdm import tqdm
import os
def convert_video_to_npy(video_path, resize_shape=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    if not cap.isOpened():
        raise ValueError(f"Error opening video file: {video_path}")

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame_resized = cv2.resize(frame, resize_shape)
        frames.append(frame_resized)

    cap.release()
    return np.array(frames)

def create_npy_from_videos_flat(src_dir, npy_dir):
    os.makedirs(npy_dir, exist_ok=True)

    for video_file in os.listdir(src_dir):
        if not video_file.lower().endswith((".mp4", ".avi")):
            continue

        video_path = os.path.join(src_dir, video_file)
        output_path = os.path.join(npy_dir, video_file.replace(".mp4", ".npy").replace(".avi", ".npy"))

        try:
            print(f"Processing: {video_file}")
            frames_array = convert_video_to_npy(video_path)
            np.save(output_path, frames_array)
        except Exception as e:
            print(f"Error processing {video_file}: {e}")


def pad_or_truncate_keypoints(keypoints, target_length=120):
    num_frames = keypoints.shape[0]
    if num_frames < target_length:
        padding = np.zeros((target_length - num_frames, keypoints.shape[1], keypoints.shape[2]))
        return np.concatenate((keypoints, padding), axis=0)
    else:
        return keypoints[:target_length]

def create_npy_from_videos(src_dir, npy_dir):
    os.makedirs(npy_dir, exist_ok=True)
    for action in os.listdir(src_dir):
        action_path = os.path.join(src_dir, action)
        if not os.path.isdir(action_path):
            continue
        dest_action_path = os.path.join(npy_dir, action)
        os.makedirs(dest_action_path, exist_ok=True)
        for video_file in os.listdir(action_path):
            if file.endswith((".avi", ".mp4")):
                video_path = os.path.join(action_path, video_file)
                output_path = os.path.join(dest_action_path, video_file.replace(".avi", ".npy"))
                try:
                    frames_array = convert_video_to_npy(video_path)
                    np.save(output_path, frames_array)
                except Exception as e:
                    print(f"Error processing {video_file}: {e}")

def extract_keypoints_from_npy(npy_dir, save_path="our_keypoints.npz"):
    mp_pose = mp.solutions.pose
    pose = mp_pose.Pose(static_image_mode=True)
    all_keypoints = {}

    for action in tqdm(os.listdir(npy_dir), desc="Processing actions"):
        action_path = os.path.join(npy_dir, action)
        if not os.path.isdir(action_path):
            continue

        all_keypoints[action] = {}
        for video_file in os.listdir(action_path):
            if not video_file.endswith(".npy"):
                continue
            video_path = os.path.join(action_path, video_file)
            try:
                sample = np.load(video_path)
                if sample.ndim != 4 or sample.shape[-1] != 3:
                    continue
                sample = sample.astype(np.uint8)

                video_keypoints = []
                for frame in sample[:]:
                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    results = pose.process(frame_rgb)
                    if results.pose_landmarks:
                        keypoints = [[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark]
                    else:
                        keypoints = np.zeros((33, 3)).tolist()
                    video_keypoints.append(keypoints)

                if video_keypoints:
                    kp_array = np.array(video_keypoints)
                    kp_array = pad_or_truncate_keypoints(kp_array, target_length=120)
                    all_keypoints[action][video_file] = kp_array
            except Exception as e:
                print(f"Error with {video_file}: {e}")

    # Transformar em dicionário plano para salvar com np.savez_compressed
    flat_dict = {}
    for action, videos in all_keypoints.items():
        for video_file, arr in videos.items():
            key = f"{action}__{video_file.replace('.npy', '')}"
            flat_dict[key] = arr

    np.savez_compressed(save_path, **flat_dict)
    print(f"Keypoints saved to {save_path}")

def extract_keypoints_from_npy_flat(npy_dir, save_path="our_keypoints.npz"):

  mp_pose = mp.solutions.pose
  pose = mp_pose.Pose(
      static_image_mode=False,
      min_detection_confidence=0.3,
      min_tracking_confidence=0.3
  )

  all_keypoints = {}

  for video_file in tqdm(os.listdir(npy_dir), desc="Processing videos"):
      if not video_file.endswith(".npy"):
          continue
      video_path = os.path.join(npy_dir, video_file)
      try:
          sample = np.load(video_path)
          if sample.ndim != 4 or sample.shape[-1] != 3:
              print(f"Skipping {video_file}, unexpected shape {sample.shape}")
              continue
          sample = sample.astype(np.uint8)

          video_keypoints = []
          for frame in sample:
              frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
              results = pose.process(frame_rgb)
              if results.pose_landmarks:
                  keypoints = [[lm.x, lm.y, lm.z] for lm in results.pose_landmarks.landmark]
              else:
                  keypoints = np.zeros((33, 3)).tolist()
              video_keypoints.append(keypoints)

          if video_keypoints:
              kp_array = np.array(video_keypoints)
              kp_array = pad_or_truncate_keypoints(kp_array, target_length=120)
              kp_array = np.expand_dims(kp_array, axis=1)
              key = video_file.replace(".npy", "")
              all_keypoints[key] = kp_array

      except Exception as e:
          print(f"Error processing {video_file}: {e}")

  np.savez_compressed(save_path, **all_keypoints)
  print(f"Keypoints saved to {save_path}")


# if __name__ == "__main__":
#     # Clone dataset se necessário
#     if not os.path.exists("dataset"):
#         os.system("git clone --filter=blob:none --no-checkout https://github.com/THETIS-dataset/dataset.git")
#         os.chdir("dataset")
#         os.system("git sparse-checkout init --cone")
#         os.system("git sparse-checkout set VIDEO_RGB")
#         os.system("git checkout")
#         os.chdir("..")

src_dir = "/content/DL_Project/our_videos"
npy_dir = "npy_videos"

create_npy_from_videos_flat(src_dir, npy_dir)
extract_keypoints_from_npy_flat(npy_dir, save_path="our_keypoints.npz")




Processing: VID-20250526-WA0012.mp4
Processing: VID-20250526-WA0002.mp4
Processing: VID-20250526-WA0011.mp4
Processing: VID-20250526-WA0005.mp4
Processing: VID-20250526-WA0008.mp4
Processing: VID-20250526-WA0010.mp4
Processing: VID-20250526-WA0009.mp4
Processing: VID-20250526-WA0004.mp4
Processing: VID-20250526-WA0007.mp4
Processing: VID-20250526-WA0001.mp4


Processing videos: 100%|██████████| 10/10 [00:52<00:00,  5.21s/it]

Keypoints saved to our_keypoints.npz





In [9]:
import numpy as np

# Carrega o ficheiro .npz
data = np.load("our_keypoints.npz")

# Lista todas as chaves (nome de cada item salvo)
print("Chaves no ficheiro npz:")
print(data.files)

# Examina uma chave específica (por exemplo, a primeira)
first_key = data.files[0]
print(f"\nShape dos dados da primeira chave '{first_key}':")
print(data[first_key].shape)

print()

print(data[first_key][0])


Chaves no ficheiro npz:
['VID-20250526-WA0002', 'VID-20250526-WA0012', 'VID-20250526-WA0007', 'VID-20250526-WA0010', 'VID-20250526-WA0005', 'VID-20250526-WA0011', 'VID-20250526-WA0004', 'VID-20250526-WA0009', 'VID-20250526-WA0008', 'VID-20250526-WA0001']

Shape dos dados da primeira chave 'VID-20250526-WA0002':
(120, 1, 33, 3)

[[[ 0.42431399  0.34511876 -0.16242044]
  [ 0.41703218  0.33559465 -0.14940841]
  [ 0.41620025  0.33553138 -0.14947633]
  [ 0.41518393  0.33548069 -0.14952862]
  [ 0.41586271  0.33641452 -0.18359487]
  [ 0.41428122  0.33694866 -0.18359771]
  [ 0.41250199  0.33753109 -0.18367767]
  [ 0.4041788   0.34174848 -0.07311164]
  [ 0.40090039  0.34496692 -0.22657399]
  [ 0.42284408  0.35328954 -0.12938932]
  [ 0.42063469  0.35384107 -0.17369001]
  [ 0.40761331  0.38469881  0.04837427]
  [ 0.39827815  0.38883004 -0.28051117]
  [ 0.45567179  0.42478576  0.08975907]
  [ 0.45202741  0.4388012  -0.33356792]
  [ 0.51837987  0.43843904 -0.00121642]
  [ 0.52236485  0.4428288  -0.

In [5]:
video_root = "/content/DL_Project/our_videos"
print("Contents of root video folder:")
print(os.listdir(video_root))

Contents of root video folder:
['test_labels.txt', 'VID-20250526-WA0012.mp4', 'VID-20250526-WA0002.mp4', 'VID-20250526-WA0011.mp4', 'VID-20250526-WA0005.mp4', 'VID-20250526-WA0008.mp4', 'VID-20250526-WA0010.mp4', 'VID-20250526-WA0009.mp4', 'VID-20250526-WA0004.mp4', 'VID-20250526-WA0007.mp4', 'VID-20250526-WA0001.mp4']


In [7]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Load keypoints
keypoints_path = "/content/our_keypoints.npz"
data = np.load(keypoints_path)

# Rebuild dictionary
all_keypoints = {}
for key in data.files:
    action, video_file = key.split("__", 1)
    if action not in all_keypoints:
        all_keypoints[action] = {}
    all_keypoints[action][video_file] = data[key]

# Select one example
first_action = list(all_keypoints.keys())[0]
first_video = list(all_keypoints[first_action].keys())[0]
frame_idx = 0

points = all_keypoints[first_action][first_video][frame_idx]  # shape: (num_joints, 3)

# === Define anatomical connections and colors ===
anatomical_connections = {
    'head': [
        (0, 1), (1, 2), (2, 3),
        (0, 4), (4, 5), (5, 6),
        (3, 7), (6, 8),
        (0, 9), (9, 10)
    ],
    'left_arm': [(11, 13), (13, 15), (15, 17), (15, 19), (15, 21)],
    'right_arm': [(12, 14), (14, 16), (16, 18), (16, 20), (16, 22)],
    'torso': [(11, 12), (23, 24), (11, 23), (12, 24)],
    'left_leg': [(23, 25), (25, 27), (27, 29), (29, 31)],
    'right_leg': [(24, 26), (26, 28), (28, 30), (30, 32)],
}

colors = {
    'head': 'gray',
    'left_arm': 'red',
    'right_arm': 'blue',
    'torso': 'orange',
    'left_leg': 'green',
    'right_leg': 'purple',
}

# === Plotting ===
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(points[:, 0], points[:, 1], points[:, 2], c='black', s=20)

for part, connections in anatomical_connections.items():
    for i, j in connections:
        if i < len(points) and j < len(points):  # ensure valid index
            ax.plot(
                [points[i, 0], points[j, 0]],
                [points[i, 1], points[j, 1]],
                [points[i, 2], points[j, 2]],
                color=colors[part], linewidth=2
            )

ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title(f"3D Skeleton - Frame {frame_idx} - {first_video}")
ax.view_init(elev=20, azim=-70)
plt.tight_layout()
plt.show()

ValueError: not enough values to unpack (expected 2, got 1)

In [10]:
import numpy as np
import pickle

ntu_joints_in_mediapipe = [
    0, 11, 12, 13, 14, 15, 16,
    23, 24, 25, 26, 27, 28,
    5, 2, 7, 8,
    17, 18, 19, 20, 21, 22,
    29, 30
]

def filter_to_ntu_joints(keypoints): # keypoints (T, 33, C)
    return keypoints[:, ntu_joints_in_mediapipe, :]

def prepare_stgcn_data(dataset, label_map):
    data_list = []
    for i, (keypoints, action_name) in enumerate(dataset):
        keypoints = np.array(keypoints)  # (T, 33, C)
        keypoints = filter_to_ntu_joints(keypoints)  # (T, 25, C)
        num_frames, num_joints, channels = keypoints.shape
        keypoints = keypoints[np.newaxis, ...]  # add person dim: (M=1, T, V, C)

        sample = {
            'frame_dir': f'sample_{i}',
            'label': label_map[action_name],
            'img_shape': None,
            'total_frames': num_frames,
            'keypoint': keypoints,
        }
        data_list.append(sample)
    return data_list

# Load your data
data_npz = np.load("our_keypoints.npz")
dataset = []

for key in data_npz.files:
    keypoints = data_npz[key]
    if keypoints.ndim == 4 and keypoints.shape[1] == 1:
        keypoints = np.squeeze(keypoints, axis=1)
    dataset.append((keypoints, "unknown"))

label_map = {"unknown": 0}
test_data = prepare_stgcn_data(dataset, label_map)

with open("our_test_labels.pkl", "wb") as f:
    pickle.dump(test_data, f)

print(f"Created our_test_labels.pkl with {len(test_data)} samples.")


Created our_test_labels.pkl with 10 samples.


In [16]:
#novo
import os
import numpy as np
import pickle

ntu_joints_in_mediapipe = [
    0, 11, 12, 13, 14, 15, 16,
    23, 24, 25, 26, 27, 28,
    5, 2, 7, 8,
    17, 18, 19, 20, 21, 22,
    29, 30
]

def filter_to_ntu_joints(keypoints):
    return keypoints[:, ntu_joints_in_mediapipe, :]

def prepare_stgcn_data(dataset):
    data_list = []
    for i, (keypoints, label) in enumerate(dataset):
        keypoints = np.array(keypoints)
        keypoints = filter_to_ntu_joints(keypoints)
        num_frames, num_joints, channels = keypoints.shape
        keypoints = keypoints[np.newaxis, ...]  # add person dim: (M=1, T, V, C)

        sample = {
            'frame_dir': f'sample_{i}',
            'label': int(label),  # usa o label verdadeiro como inteiro
            'img_shape': None,
            'total_frames': num_frames,
            'keypoint': keypoints,
        }
        data_list.append(sample)
    return data_list

# === Etapa 1: Ler labels do ficheiro ===
label_file_path = "test_labels.txt"
video_to_label = {}

with open(label_file_path, "r") as f:
    for line in f:
        video_name, label = line.strip().split()
        video_name = os.path.splitext(video_name)[0]  # remove .mp4
        video_to_label[video_name] = label  # mantém como string (ou converte já aqui para int)

# === Etapa 2: Carregar dados ===
data_npz = np.load("our_keypoints.npz")
dataset = []

for key in data_npz.files:
    video_name = key  # já está sem .mp4
    if video_name in video_to_label:
        keypoints = data_npz[key]
        if keypoints.ndim == 4 and keypoints.shape[1] == 1:
            keypoints = np.squeeze(keypoints, axis=1)
        dataset.append((keypoints, video_to_label[video_name]))
    else:
        print(f"Aviso: {video_name} não tem label, será ignorado.")

# === Etapa 3: Preparar e guardar ===
test_data = prepare_stgcn_data(dataset)

with open("our_test_labels.pkl", "wb") as f:
    pickle.dump(test_data, f)

print(f"Created our_test_labels.pkl with {len(test_data)} samples.")


Created our_test_labels.pkl with 10 samples.


testess

In [17]:
import pickle

with open("our_test_labels.pkl", "rb") as f:
    data = pickle.load(f)

print(f"Total de amostras: {len(data)}")


Total de amostras: 10


In [18]:
# Mostrar as primeiras 3 amostras
for i, sample in enumerate(data[:3]):
    print(f"\n--- Sample {i} ---")
    print("Frame dir:", sample['frame_dir'])
    print("Label:", sample['label'])
    print("Total frames:", sample['total_frames'])
    print("Keypoint shape:", sample['keypoint'].shape)  # Deve ser (1, T, 25, 3)



--- Sample 0 ---
Frame dir: sample_0
Label: 1
Total frames: 120
Keypoint shape: (1, 120, 25, 3)

--- Sample 1 ---
Frame dir: sample_1
Label: 1
Total frames: 120
Keypoint shape: (1, 120, 25, 3)

--- Sample 2 ---
Frame dir: sample_2
Label: 4
Total frames: 120
Keypoint shape: (1, 120, 25, 3)


In [19]:
labels = [sample['label'] for sample in data]
print("Labels únicas encontradas:", sorted(set(labels)))


Labels únicas encontradas: [1, 4, 5, 6]
