### json 파일 다시 추출 
* 기존 10프레임 단위로 추출한 json 대신 6프레임으로 추출
* 클래스, bbox 좌표 등 모두 추가

In [2]:
from ultralytics import YOLO
import cv2
import os
import json
import gc
import mediapipe as mp
from collections import defaultdict

In [6]:
# YOLO 모델 로드
model_path = 'D:\\project\\prjvenv\\runs\\detect\\human_fall_s30\\weights\\best.pt'
model = YOLO(model_path)

# MediaPipe 설정
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.3)

# 원하는 랜드마크 정의
DESIRED_LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]

# 폴더 경로 설정
video_directory = 'D:\\human_fall\\re_video\\validation'
output_folder = 'D:\\human_fall\\re_landmark\\val_addition_yolobbox_json_6'
sensor_folder = 'D:\\human_fall\\re_landmark\\val_sensor_json'

# 출력 폴더가 없으면 생성
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

def adjust_bbox(bbox, scale_factor, frame_shape):
    x1, y1, x2, y2 = bbox
    width = x2 - x1
    height = y2 - y1
    center_x = (x1 + x2) / 2
    center_y = (y1 + y2) / 2
    
    new_width = width * scale_factor
    new_height = height * scale_factor
    
    new_x1 = max(0, int(center_x - new_width / 2))
    new_y1 = max(0, int(center_y - new_height / 2))
    new_x2 = min(int(center_x + new_width / 2), frame_shape[1])
    new_y2 = min(int(center_y + new_height / 2), frame_shape[0])
    
    return [new_x1, new_y1, new_x2, new_y2]

def process_video(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return None

    pose_data = defaultdict(dict)
    frame_count = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    while cap.isOpened():
        success, frame = cap.read()
        if not success:
            break

        # 매 6프레임마다 실행
        if frame_count % 6 == 0:
            # YOLO로 객체 감지
            results = model(frame, conf=0.5)  # 신뢰도 임계값 설정

            for r in results:
                boxes = r.boxes
                for box in boxes:
                    class_name = model.names[int(box.cls)]
                    
                    # 원본 바운딩 박스 좌표 추출
                    x1, y1, x2, y2 = map(int, box.xyxy[0])
                    
                    # 바운딩 박스 크기 조절(20% 확대)
                    adjusted_bbox = adjust_bbox([x1, y1, x2, y2], scale_factor=1.2, frame_shape=frame.shape)
                    
                    # 조절된 바운딩 박스에서 person_image 추출
                    person_image = frame[adjusted_bbox[1]:adjusted_bbox[3], adjusted_bbox[0]:adjusted_bbox[2]]
                    
                    # MediaPipe로 포즈 추정
                    results_pose = pose.process(cv2.cvtColor(person_image, cv2.COLOR_BGR2RGB))
                        
                    if results_pose.pose_landmarks:
                        frame_landmarks = {}
                        for idx, landmark in enumerate(results_pose.pose_landmarks.landmark):
                            if idx in DESIRED_LANDMARKS:
                                # 전체 프레임에 대한 상대적 좌표로 변환
                                global_x = (adjusted_bbox[0] + landmark.x * person_image.shape[1]) / frame.shape[1]
                                global_y = (adjusted_bbox[1] + landmark.y * person_image.shape[0]) / frame.shape[0]
                                frame_landmarks[f"landmark_{idx}"] = {
                                    "x": float(global_x),
                                    "y": float(global_y),
                                }
                        frame_landmarks["class"] = class_name
                        frame_landmarks["confidence"] = float(box.conf.item())
                        frame_landmarks["bbox"] = {
                            'x1': float(x1), 'y1': float(y1), 'x2': float(x2), 'y2': float(y2)
                        }
                        pose_data[f"frame_{frame_count}"] = frame_landmarks
                        break  # 첫 번째 감지된 사람만 처리
            
        frame_count += 1
        if frame_count % 100 == 0:
            print(f"Processed {frame_count}/{total_frames} frames ({frame_count/total_frames*100:.2f}%)")

    cap.release()
    return pose_data

def addition_sensordata_json(sensor_folder, output_folder):
    for file in os.listdir(output_folder):
        if file.endswith('.json'):
            main_file_path = os.path.join(output_folder, file)
            sensor_file_path = os.path.join(sensor_folder, file)
            
            if os.path.exists(sensor_file_path):
                with open(sensor_file_path, 'r', encoding='utf-8') as f:
                    sensor_data = json.load(f)
                    Sensordata = sensor_data.get('sensordata', {})
                
                with open(main_file_path, 'r', encoding='utf-8') as f:
                    main_data = json.load(f)
                
                main_data['sensordata'] = Sensordata 
                
                with open(main_file_path, 'w', encoding='utf-8') as f:
                    json.dump(main_data, f, ensure_ascii=False, indent=2)
                
                print(f'센서 데이터 추가 완료: {file}')
            else:
                print(f'센서 데이터 파일이 없어서 건너뜀: {file}')

def re_classify_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    fall_start_frame = data.get('sensordata', {}).get('fall_start_frame', -1)
    fall_end_frame = data.get('sensordata', {}).get('fall_end_frame', -1)
    pose_data = data['pose_data']
    
    if fall_start_frame == 0 or fall_end_frame == 0:
        for frame_key, frame_data in pose_data.items():
            frame_data['class'] = 'Normal'
    else:
        for frame_key, frame_data in pose_data.items():
            frame = int(frame_key.split('_')[1])
            if frame < fall_start_frame:
                new_class = 'Normal'
            elif fall_start_frame <= frame <= fall_end_frame:
                new_class = 'Danger'
            else:
                new_class = 'Fall'
            
            frame_data['class'] = new_class
        
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)     

def addition_classes(folder):
    for filename in os.listdir(folder):
        if filename.endswith('.json'):
            file_path = os.path.join(folder, filename)
            re_classify_json(file_path)
            print(f'클래스 재분류 완료: {filename}')

# 메인 실행 부분
def main():
    # 1. 비디오 처리 및 JSON 생성
    for root, dirs, files in os.walk(video_directory):
        for file in files:
            if file.endswith(".mp4"):
                video_path = os.path.join(root, file)
                print(f"Processing video: {video_path}")
                video_pose_data = process_video(video_path)
                
                if video_pose_data:
                    json_filename = os.path.splitext(file)[0] + '.json'
                    json_path = os.path.join(output_folder, json_filename)
                    
                    with open(json_path, 'w') as f:
                        json.dump({
                            "video_path": video_path,
                            "pose_data": video_pose_data
                        }, f, indent=4)
                    
                    print(f'JSON 생성 완료: {json_filename}')
                
                gc.collect()

    # 2. 센서 데이터 추가
    addition_sensordata_json(sensor_folder, output_folder)

    # 3. 클래스 재분류
    addition_classes(output_folder)

    print("모든 처리 완료")

if __name__ == "__main__":
    main()

Processing video: D:\human_fall\re_video\validation\N\00582_H_D_N_C1.mp4

0: 384x640 1 Non_Fall, 65.0ms
Speed: 2.0ms preprocess, 65.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 59.6ms
Speed: 2.0ms preprocess, 59.6ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 61.8ms
Speed: 1.0ms preprocess, 61.8ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 57.0ms
Speed: 1.0ms preprocess, 57.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 64.0ms
Speed: 2.0ms preprocess, 64.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 59.0ms
Speed: 1.0ms preprocess, 59.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 62.0ms
Speed: 1.0ms preprocess, 62.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 60.0ms
