In [None]:
import os
import cv2

# Paths
input_folder = './fall_videos/ur_videos'
output_folder = './fall_videos/processed'

# Loop through each video in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith('.mp4'):
        input_path = os.path.join(input_folder, filename)
        output_path = os.path.join(output_folder, filename)

        cap = cv2.VideoCapture(input_path)

        # Get original properties
        fps = cap.get(cv2.CAP_PROP_FPS)
        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

        # Output: half the width (right side)
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (width // 2, height))

        print(f"Processing {filename}...")

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Crop right half: frame[:, 320:] if original width = 640
            right_half = frame[:, width // 2:]

            out.write(right_half)

        cap.release()
        out.release()
        print(f"Saved to {output_path}")

In [None]:
from ultralytics import YOLO
import pandas as pd
import os
import cv2

# GENERATE keypoints here
model = YOLO("yolo11n-pose.pt")  # load an official model

# input_folder = './fall_videos/test'
input_folder = './fall_videos/processed'

all_keypoints = []

def is_none_or_empty(x):
    return x is None or len(x) == 0

for filename in os.listdir(input_folder):
    if filename.endswith('.mp4'):
        full_path = os.path.join(input_folder, filename)
        print("Processing {}".format(full_path))
        results = model.track(source=full_path)

        for frame_number, result in enumerate(results): # each is a frame

            boxes = result.boxes  # Detections
            keypoints = result.keypoints  # If using a pose model

            if is_none_or_empty(boxes) or is_none_or_empty(keypoints):
                continue

            best_index = None
            best_conf = 0.0

            # inside a frame can have multiple people but only take the highest confidence
            for i in range(len(boxes)):
                conf = float(boxes.conf[i])
                cls = int(boxes.cls[i])
                if cls == 0 and conf > best_conf:
                    best_conf = conf
                    best_index = i

            # print("best confidence: ", best_conf)

            # keypoints.data is a torch.Tensor of shape (num_detections, num_keypoints, 3)
            keypoints_tensor = keypoints.data[best_index]

            keypoints_np = keypoints_tensor.cpu().detach().numpy()

            # get the name like the format of openpose so we can compare later
            base_name = os.path.splitext(filename)[0]
            frame_str = f"{frame_number:012d}"  # zero-pad to 12 digits
            tag = f"{base_name}_{frame_str}_keypoints"

            flat = keypoints_np.flatten().tolist()

            flat_with_tag = flat + [tag]
            all_keypoints.append(flat_with_tag)

df = pd.DataFrame(all_keypoints)
df.to_csv('ur_keypoints_yolo.csv', index=False, header=False)