In [3]:
import cv2
import numpy as np
import json
import os

def round_to_3decimals(value):
    return round(value, 3)

def extract_keypoints(frame, net):
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (368, 368), (0, 0, 0), swapRB=False, crop=False)
    net.setInput(inpBlob)
    output = net.forward()
    H, W = output.shape[2], output.shape[3]
    points = []
    for i in range(18):
        probMap = output[0, i, :, :]
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
        x = (frameWidth * point[0]) / W
        y = (frameHeight * point[1]) / H
        points.append([x, y, prob] if prob > 0.1 else [0, 0, 0])
    return points

def draw_skeleton(frame, keypoints):
    for i, point in enumerate(keypoints):
        x, y, prob = point
        if prob > 0.1:
            cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
    pairs = [(1, 2), (1, 5), (2, 3), (3, 4), (5, 6), (6, 7), (1, 8), (8, 9), (9, 10),
             (1, 11), (11, 12), (12, 13), (1, 0), (0, 14), (14, 16), (0, 15), (15, 17)]
    for pair in pairs:
        partA, partB = pair
        if keypoints[partA][2] > 0.1 and keypoints[partB][2] > 0.1:
            cv2.line(frame, 
                     (int(keypoints[partA][0]), int(keypoints[partA][1])),
                     (int(keypoints[partB][0]), int(keypoints[partB][1])),
                     (0, 255, 255), 2)
    return frame

def process_video(video_path, output_path, output_raw_path, net):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    data, raw_data = [], []
    frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        keypoints = extract_keypoints(frame, net)
        frame_with_skeleton = draw_skeleton(frame.copy(), keypoints)
        cv2.imshow('Skeleton', frame_with_skeleton)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        
        frame_data = {
            "frame_index": frame_count,
            "skeleton": [{
                "pose": [item for sublist in keypoints for item in sublist[:2]],
                "score": [item[2] for item in keypoints]
            }]
        }
        data.append(frame_data)

        raw_frame_data = {
            "frame_index": frame_count,
            "skeleton": [{
                "pose": [item for sublist in keypoints for item in sublist],
                "score": [item[2] for item in keypoints]
            }]
        }
        raw_data.append(raw_frame_data)

        frame_count += 1
        if frame_count % 100 == 0:
            print(f"Processed {frame_count}/{total_frames} frames")

    cap.release()
    cv2.destroyAllWindows()

    result = {"data": data}
    raw_result = {"data": raw_data}
    
    epsilon = 1e-7
    for frame in result['data']:
        for skeleton in frame['skeleton']:
            pose = np.array(skeleton['pose']).reshape(-1, 2)
            min_vals = np.min(pose, axis=0)
            max_vals = np.max(pose, axis=0)

            normalized_pose = (pose - min_vals) / np.maximum(max_vals - min_vals, epsilon)
            normalized_pose = np.vectorize(round_to_3decimals)(normalized_pose)
            skeleton['pose'] = normalized_pose.flatten().tolist()
            skeleton['score'] = [round_to_3decimals(s) for s in skeleton['score']]

    with open(output_path, 'w') as f:
        json.dump(result, f, indent=4)
    
    with open(output_raw_path, 'w') as f:
        json.dump(raw_result, f, indent=4)

    print(f"Normalized JSON data saved to {output_path}")
    print(f"Raw JSON data saved to {output_raw_path}")

protoFile = "C:\\Users\\sh\\PycharmProjects\\home-action-recognition\\openpose\\pose_deploy_linevec.prototxt"
weightsFile = "C:\\Users\\sh\\PycharmProjects\\home-action-recognition\\openpose\\pose_iter_440000.caffemodel"

net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

video_path = "../long_video/long (2).mp4"
output_path = "../data/Kinetics/kinetics-skeleton/long (2).json"
output_raw_path = "../data/Kinetics/kinetics-skeleton-raw/long (2)_raw.json"
process_video(video_path, output_path, output_raw_path, net)

Processed 100/4332 frames
Processed 200/4332 frames
Processed 300/4332 frames
Processed 400/4332 frames
Processed 500/4332 frames
Processed 600/4332 frames
Processed 700/4332 frames
Processed 800/4332 frames
Processed 900/4332 frames
Processed 1000/4332 frames
Processed 1100/4332 frames
Processed 1200/4332 frames
Processed 1300/4332 frames
Processed 1400/4332 frames
Processed 1500/4332 frames
Processed 1600/4332 frames
Processed 1700/4332 frames
Processed 1800/4332 frames
Processed 1900/4332 frames
Processed 2000/4332 frames
Processed 2100/4332 frames
Processed 2200/4332 frames
Processed 2300/4332 frames
Processed 2400/4332 frames
Processed 2500/4332 frames
Processed 2600/4332 frames
Processed 2700/4332 frames
Processed 2800/4332 frames
Processed 2900/4332 frames
Processed 3000/4332 frames
Processed 3100/4332 frames
Processed 3200/4332 frames
Processed 3300/4332 frames
Processed 3400/4332 frames
Processed 3500/4332 frames
Processed 3600/4332 frames
Processed 3700/4332 frames
Processed 