In [None]:
import numpy as np
import os
import json
import cv2
from sklearn.model_selection import train_test_split

BODY_PARTS = {
    "Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
    "LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8,
    "RKnee": 9, "RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13,
    "REye": 14, "LEye": 15, "REar": 16, "LEar": 17
}

POSE_PAIRS = [
    ("Neck", "RShoulder"), ("Neck", "LShoulder"), ("RShoulder", "RElbow"),
    ("RElbow", "RWrist"), ("LShoulder", "LElbow"), ("LElbow", "LWrist"),
    ("Neck", "RHip"), ("RHip", "RKnee"), ("RKnee", "RAnkle"), ("Neck", "LHip"),
    ("LHip", "LKnee"), ("LKnee", "LAnkle"), ("Neck", "Nose"), ("Nose", "REye"),
    ("REye", "REar"), ("Nose", "LEye"), ("LEye", "LEar")
]

protoFile = "C:\\Users\\sh\\PycharmProjects\\home-action-recognition\\openpose\\pose_deploy_linevec.prototxt"
weightsFile = "C:\\Users\\sh\\PycharmProjects\\home-action-recognition\\openpose\\pose_iter_440000.caffemodel"

net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

def extract_keypoints(frame):
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]

    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (368, 368), (0, 0, 0), swapRB=False, crop=False)

    net.setInput(inpBlob)
    output = net.forward()

    H = output.shape[2]
    W = output.shape[3]

    points = []
    for i in range(18):
        probMap = output[0, i, :, :]
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)

        x = (frameWidth * point[0]) / W
        y = (frameHeight * point[1]) / H

        if prob > 0.1:
            points.append([x, y, prob])
        else:
            points.append([0, 0, 0])

    return points

def draw_skeleton(frame, keypoints):
    for i, point in enumerate(keypoints):
        x, y, prob = point
        if prob > 0.1:
            cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
    
    for pair in POSE_PAIRS:
        partA = BODY_PARTS[pair[0]]
        partB = BODY_PARTS[pair[1]]
        if keypoints[partA][2] > 0.1 and keypoints[partB][2] > 0.1:
            cv2.line(frame, 
                     (int(keypoints[partA][0]), int(keypoints[partA][1])),
                     (int(keypoints[partB][0]), int(keypoints[partB][1])),
                     (0, 255, 255), 2)
    return frame

def process_video(video_path, max_frame=150, label_index=None, label=None):
    cap = cv2.VideoCapture(video_path)
    frames = []
    raw_frames = []
    
    for frame_idx in range(max_frame):
        ret, frame = cap.read()
        if not ret:
            break
        
        keypoints = extract_keypoints(frame)
        
        frame_with_skeleton = draw_skeleton(frame.copy(), keypoints)
        cv2.imshow('Skeleton', frame_with_skeleton)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        
        frames.append({
            "frame_index": frame_idx,
            "skeleton": [{
                "pose": [item for sublist in keypoints for item in sublist[:2]],
                "score": [item[2] for item in keypoints]
            }]
        })
        
        raw_frames.append({
            "frame_index": frame_idx,
            "skeleton": [{
                "pose": [item for sublist in keypoints for item in sublist],
                "score": [item[2] for item in keypoints]
            }]
        })
    
    cap.release()
    cv2.destroyAllWindows()
    
    result = {"data": frames}
    raw_result = {"data": raw_frames}
    if label_index is not None:
        result["label_index"] = label_index
        raw_result["label_index"] = label_index
    if label is not None:
        result["label"] = label
        raw_result["label"] = label
    
    return result, raw_result

def normalize_poses(data):
    epsilon = 1e-7  # 작은 값 추가
    for frame in data['data']:
        for skeleton in frame['skeleton']:
            pose = np.array(skeleton['pose']).reshape(-1, 2)
            
            min_vals = np.min(pose, axis=0)
            max_vals = np.max(pose, axis=0)
            
            normalized_pose = (pose - min_vals) / np.maximum(max_vals - min_vals, epsilon)
            
            normalized_pose = np.round(normalized_pose, 3)
            
            skeleton['pose'] = normalized_pose.flatten().tolist()
            
            skeleton['score'] = [round(s, 3) for s in skeleton['score']]

    return data

def gendata(data_path, out_path, output_raw_path, label_path, val_split=0.2):
    classes = ['walk_0', 'stand_1', 'sit_2', 'armExe_3', 'lieDown_4', 'fall_5']
    all_labels = {'train': {}, 'val': {}}
    
    for class_idx, class_name in enumerate(classes):
        class_path = os.path.join(data_path, class_name)
        video_files = os.listdir(class_path)
        
        train_files, val_files = train_test_split(video_files, test_size=val_split, random_state=42)
        
        for video_file in video_files:
            video_path = os.path.join(class_path, video_file)
            print(f"Processing {video_path}")
            
            label_index = int(class_name.split('_')[1])
            label = class_name.split('_')[0]
            video_data, raw_video_data = process_video(video_path, label_index=label_index, label=label)
            
            video_data = normalize_poses(video_data)
            
            video_id = os.path.splitext(video_file)[0]
            
            dataset = 'train' if video_file in train_files else 'val'
            
            video_json_path = os.path.join(out_path, f"kinetics_{dataset}", f"{video_id}.json")
            raw_video_json_path = os.path.join(output_raw_path, f"kinetics_{dataset}_raw", f"{video_id}.json")
            
            os.makedirs(os.path.dirname(video_json_path), exist_ok=True)
            os.makedirs(os.path.dirname(raw_video_json_path), exist_ok=True)
            
            with open(video_json_path, 'w') as f:
                json.dump(video_data, f, indent=4)
            
            with open(raw_video_json_path, 'w') as f:
                json.dump(raw_video_data, f, indent=4)
            
            all_labels[dataset][video_id] = {
                "has_skeleton": True,
                "label": label,
                "label_index": label_index
            }

    for dataset in ['train', 'val']:
        with open(os.path.join(out_path, f"kinetics_{dataset}_label.json"), 'w') as f:
            json.dump(all_labels[dataset], f, indent=4)

if __name__ == "__main__":
    data_path = "C:\\Users\\sh\\PycharmProjects\\home-action-recognition\\videos"
    out_path = "../data/Kinetics/kinetics-skeleton"
    output_raw_path = "../data/Kinetics/kinetics-skeleton-raw"
    
    os.makedirs(os.path.join(out_path, "kinetics_train"), exist_ok=True)
    os.makedirs(os.path.join(out_path, "kinetics_val"), exist_ok=True)
    os.makedirs(os.path.join(output_raw_path, "kinetics_train_raw"), exist_ok=True)
    os.makedirs(os.path.join(output_raw_path, "kinetics_val_raw"), exist_ok=True)

    gendata(data_path, out_path, output_raw_path, out_path)

    print("Data processing and JSON saving completed for individual videos and labels.")