In [2]:
import numpy as np
import torch
from torch import nn
from scipy.spatial import distance
import cv2
from ultralytics import YOLO
import mediapipe as mp

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


### yolo, GRU 동시 실행

In [14]:
# 랜드마크 인덱스 정의 (예: 코, 왼쪽 어깨, 오른쪽 어깨 등)
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]  # 총 11개 랜드마크

# YOLO 모델 로드
yolo_model = YOLO('D:\\project\\prjvenv\\runs\\detect\\human_fall_s30\\weights\\best.pt')

# GRU 모델 정의 및 로드 
class GRUModel(torch.nn.Module):
    def __init__(self, input_size=27):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size = 64
        self.num_layers = num_layers = 2
        self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True,
                          dropout=0.5)
        self.fc = nn.Linear(hidden_size, 3)  # output_size를 직접 지정합니다.
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

# GRU 모델 로드
input_size = len(LANDMARKS) * 2 + 5   # 랜드마크 (22) + 바운딩박스 정보 (4) + 속도 (1) + 클래스 (1) 
gru_model = GRUModel(input_size=input_size)  
gru_model.load_state_dict(torch.load('D:\\project\\prjvenv\\GRU\\best_GRU_model_2.pt', map_location=torch.device('cpu')))
gru_model.eval()

# 클래스 이름 정의
class_names = {0: 'Normal', 1: 'Fall', 2: 'Danger'}

def calculate_head_upper_body_speed(keypoints, prev_keypoints):
    h = np.array([keypoints[0, 0], keypoints[0, 1]])   # 머리 좌표
    l = np.array([keypoints[11, 0], keypoints[11, 1]])  # 왼쪽 어깨 좌표
    r = np.array([keypoints[12, 0], keypoints[12, 1]])  # 오른쪽 어깨 좌표

    # 이전 프레임의 좌표
    prev_h = np.array([prev_keypoints[0, 0], prev_keypoints[0, 1]])
    prev_l = np.array([prev_keypoints[11, 0], prev_keypoints[11, 1]])
    prev_r = np.array([prev_keypoints[12, 0], prev_keypoints[12, 1]])

    # 현재 프레임과 이전 프레임의 상체 중심 계산
    center_new = (h + l + r) / 3
    center_prev = (prev_h + prev_l + prev_r) / 3

    # 유클리드 거리 계산 (속도)
    speed = distance.euclidean(center_new, center_prev)
    return speed

def process_landmarks(landmarks): 
    selected_landmarks = landmarks[LANDMARKS]   # 지정된 랜드마크 선택 
    return selected_landmarks[:, :2].flatten()   # (x,y) 좌표 반환

def detect_fall(landmarks, prev_landmarks):
    speed = calculate_head_upper_body_speed(landmarks, prev_landmarks) if prev_landmarks is not None else 0
    processed_landmarks = process_landmarks(landmarks)

    # 비디오 프레임을 YOLO 입력 크기로 리사이즈
    resized_frame = cv2.resize(frame, (640, 640))
    
    # YOLO를 사용하여 바운딩 박스 예측
    results = yolo_model(resized_frame)
   
    # YOLO 예측 결과에서 바운딩 박스 정보 가져오기 
    bbox_info=results[0].boxes.xyxy.cpu().numpy() if results and len(results[0].boxes) > 0 else None
    
    if bbox_info is None or len(bbox_info) == 0:
       print("No bounding boxes detected.")
       return None , None

    # 첫 번째 바운딩 박스 정보 가져오기 (여러 개가 있을 경우 첫 번째만 사용)
    x1 , y1 , x2 , y2=bbox_info[0]  
    
    # 바운딩 박스 좌표를 원본 프레임에 맞게 변환 (640x640에서 원본 크기로)
    original_width = frame.shape[1]
    original_height = frame.shape[0]
    x1, y1, x2, y2 = bbox_info[0]
    
    x1 *= original_width / 640.0
    x2 *= original_width / 640.0
    y1 *= original_height / 640.0
    y2 *= original_height / 640.0
    print(f"Transformed coordinates: {(x1, y1, x2, y2)}")

    bbox_width=x2 - x1 
    bbox_height=y2 - y1  
   
    bbox_ratio=bbox_width / bbox_height if bbox_height !=0 else float('inf')
   
    # 클래스 결정 
    if bbox_ratio <=1.3:
       bbox_class=0   # Normal 
    elif bbox_ratio <=1.7:
       bbox_class=2   # Danger 
    else:
       bbox_class=1   # Fall 

    # 입력 데이터 구성 
    input_data=np.concatenate([processed_landmarks,
                               [bbox_width,
                                bbox_height,
                                bbox_ratio,
                                speed,
                                bbox_class]])

    print(f"Input data length: {len(input_data)}, expected length: {input_size}")

    if len(input_data) != input_size:
       print(f"Warning: input_data length is {len(input_data)}, expected {input_size}")
       return None , None
    
    input_tensor=torch.FloatTensor(input_data).unsqueeze(0).unsqueeze(0)

    with torch.no_grad():
       output=gru_model(input_tensor)

    probabilities=torch.softmax(output , dim=1).cpu().numpy()[0]  
    predicted_class=torch.argmax(output).item()
    
    return predicted_class , probabilities

# 비디오 파일 경로 지정 및 열기 
video_path="D:\\human_fall\\re_video\\training\\Y\\01452_O_B_FY_C8.mp4"
cap=cv2.VideoCapture(video_path)

# 비디오 속성 가져오기 
width=int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height=int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps=cap.get(cv2.CAP_PROP_FPS)

# 출력 비디오 설정 
fourcc=cv2.VideoWriter_fourcc(*'mp4v')
out_path='data_video_test_outputY_GRU_yolo.mp4'
out=cv2.VideoWriter(out_path,fourcc,fps,(width,height))

prev_landmarks=None

# 프레임 처리 루프 
while cap.isOpened():
    ret , frame=cap.read()
    
    if not ret:
       break

    rgb_frame=cv2.cvtColor(frame , cv2.COLOR_BGR2RGB)
    results_pose=pose.process(rgb_frame)

    if results_pose.pose_landmarks:
        
        landmarks=np.array([[lm.x * width , lm.y * height , lm.z] for lm in results_pose.pose_landmarks.landmark])
       
        if prev_landmarks is not None: 
            result=detect_fall(landmarks , prev_landmarks)
            if result is not None:  
                label , probs=result 
                print(f"Predicted Class: {label}, Probabilities: {probs}")  
            else:
                print("Detection failed.")
        else: 
            label=None 

        prev_landmarks=landmarks 

        # 바운딩 박스와 라벨 그리기 
        if label is not None and bbox_info is not None and len(bbox_info) > 0:
            x1 , y1 , x2 , y2=bbox_info[0]   
            color=(0 ,255 ,0) if label==0 else ((255 ,255 ,0) if label==2 else (255 ,0 ,0)) 
            cv2.rectangle(frame , (int(x1), int(y1)), (int(x2), int(y2)), color ,2)
            class_name=class_names[label] if label is not None else 'Unknown'
            cv2.putText(frame , f'GRU: {class_name}' , (int(x1) , int(y1) -10) , cv2.FONT_HERSHEY_SIMPLEX ,0.7 , color ,2)
            print("YOLO results:", results[0].boxes.xyxy.cpu().numpy())
            print("Classes:", results[0].boxes.cls.cpu().numpy())
            print("Confidences:", results[0].boxes.conf.cpu().numpy())
        # 랜드마크 표시 
        mp_drawing.draw_landmarks(frame , results_pose.pose_landmarks , mp_pose.POSE_CONNECTIONS)

    # 프레임 저장 및 출력 
    resized_frame=cv2.resize(frame,(1920, 1080))
    out.write(resized_frame) 
    cv2.imshow('Fall Detection', resized_frame) 
    if cv2.waitKey(1) & 0xFF==ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


0: 640x640 1 Fall, 6.0ms
Speed: 3.0ms preprocess, 6.0ms inference, 71.8ms postprocess per image at shape (1, 3, 640, 640)
Transformed coordinates: (2124.3585205078125, 799.4786682128906, 2524.6387939453125, 1467.958969116211)
Input data length: 27, expected length: 27
Predicted Class: 2, Probabilities: [   0.027933     0.17902     0.79305]


NameError: name 'bbox_info' is not defined

### GRU 모델만 사용
* bbox의 비율을 기준으로 클래스 분류

In [6]:
# 바운딩 박스 계산 및 그리기 함수
def calculate_and_draw_bbox(frame, landmarks):
    x_coordinates = landmarks[:, 0]
    y_coordinates = landmarks[:, 1]
    
    x1 = max(0, int(np.min(x_coordinates)))
    y1 = max(0, int(np.min(y_coordinates)))
    x2 = min(frame.shape[1], int(np.max(x_coordinates)))
    y2 = min(frame.shape[0], int(np.max(y_coordinates)))
    
    bbox_width = x2 - x1
    bbox_height = y2 - y1
    
    # 높이가 0일 경우 비율을 무한대로 설정
    bbox_ratio = bbox_width / bbox_height if bbox_height != 0 else float('inf')
    
    # 바운딩 박스 클래스 결정
    bbox_class = 0
    if bbox_ratio < 0.5:
        bbox_class = 0  # Normal
    elif 0.5 <= bbox_ratio < 0.7:
        bbox_class = 2  # Danger
    else:
        bbox_class = 1  # Fall
    
    # 바운딩 박스를 조금 더 넓게 조정 (각 방향으로 50픽셀 추가)
    padding = 50
    x1 = max(0, x1 - padding)
    y1 = max(0, y1 - padding)
    x2 = min(frame.shape[1], x2 + padding)
    y2 = min(frame.shape[0], y2 + padding)
    
    return (x1, y1, x2, y2), bbox_width, bbox_height, bbox_ratio, bbox_class

def calculate_head_upper_body_speed(keypoints, prev_keypoints):
    h = np.array([keypoints[0, 0], keypoints[0, 1]])  # 머리 좌표
    l = np.array([keypoints[11, 0], keypoints[11, 1]])  # 왼쪽 어깨 좌표
    r = np.array([keypoints[12, 0], keypoints[12, 1]])  # 오른쪽 어깨 좌표

    # 이전 프레임의 좌표
    prev_h = np.array([prev_keypoints[0, 0], prev_keypoints[0, 1]])
    prev_l = np.array([prev_keypoints[11, 0], prev_keypoints[11, 1]])
    prev_r = np.array([prev_keypoints[12, 0], prev_keypoints[12, 1]])

    # 현재 프레임과 이전 프레임의 상체 중심
    center_new = (h + l + r) / 3
    center_prev = (prev_h + prev_l + prev_r) / 3

    # 유클리드 거리 계산 (속도)
    speed = distance.euclidean(center_new, center_prev)
    return speed

# MediaPipe 초기화
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]

def process_landmarks(landmarks): 
    selected_landmarks = landmarks[LANDMARKS]
    return selected_landmarks[:, :2].flatten()

# GRU 모델 정의
class GRUModel(torch.nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, output_size=3, dropout=0.5):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

# GRU 모델 로드
input_size = 27  
hidden_size = 64
num_layers = 2
output_size = 3
dropout = 0.5    

gru_model = GRUModel(input_size, hidden_size, num_layers, output_size, dropout)
gru_model.load_state_dict(torch.load('D:\\project\\prjvenv\\GRU\\best_GRU_model.pt', map_location=torch.device('cpu')))
gru_model.eval()

# 클래스 이름 정의
class_names = {0: 'Normal', 1: 'Fall', 2: 'Danger'}

# 낙상 감지 함수
def detect_fall(landmarks, prev_landmarks, frame):
    speed = calculate_head_upper_body_speed(landmarks, prev_landmarks) if prev_landmarks is not None else 0
    processed_landmarks = process_landmarks(landmarks)
    
    bbox_info = calculate_and_draw_bbox(frame, landmarks)
    
    if bbox_info is None:
        return None
    
    bbox_width, bbox_height, bbox_ratio, bbox_class = bbox_info[1:4] + (bbox_info[4],)
    
    print(f"Processed landmarks length: {len(processed_landmarks)}")
    print(f"BBox width: {bbox_width}, height: {bbox_height}, ratio: {bbox_ratio}, speed: {speed}")   

    # processed_landmarks와 함께 바운딩 박스 좌표 및 속도 정보 추가
    input_data = np.concatenate([processed_landmarks,
                                  [bbox_width,
                                   bbox_height,
                                   bbox_ratio,
                                   speed,
                                   bbox_class]])
    
    if len(input_data) != input_size:
        print(f"Warning: input_data length is {len(input_data)}, expected {input_size}")
        return None
    
    input_tensor = torch.FloatTensor(input_data).unsqueeze(0).unsqueeze(0)
    
    with torch.no_grad():
        output = gru_model(input_tensor)

    probabilities = torch.softmax(output, dim=1).numpy()[0]
    predicted_class = torch.argmax(output).item()
    
    return predicted_class, probabilities

# 비디오 파일 경로 지정
video_path = "D:\\human_fall\\re_video\\validation\\N\\02327_H_A_N_C6.mp4"
# 비디오 파일 열기
cap = cv2.VideoCapture(video_path)

# 비디오 속성 가져오기
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# 출력 비디오 설정
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_path='data_video_test_outputY_GRU_only.mp4'
out= cv2.VideoWriter(out_path,fourcc,fps,(width,height))

prev_landmarks=None

# 프레임 처리 루프
while cap.isOpened():
    ret , frame= cap.read()
    if not ret:
        break

    # MediaPipe로 포즈 추정 
    rgb_frame=cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results_pose=pose.process(rgb_frame)

    if results_pose.pose_landmarks:
       landmarks=np.array([[lm.x * width , lm.y * height , lm.z] for lm in results_pose.pose_landmarks.landmark])
       
       # 바운딩 박스 계산 및 그리기 
       bbox_info=calculate_and_draw_bbox(frame , landmarks)

       if prev_landmarks is not None: 
           label , probs=detect_fall(landmarks , prev_landmarks , frame)
           print(f"Predicted Class: {label}, Probabilities: {probs}")  
       else: 
           label=None 

       prev_landmarks=landmarks 

       # 바운딩 박스와 라벨 그리기 
       x1 , y1 , x2 , y2=bbox_info[0]   
       color=(0 ,255 ,0) if label==0 else ((0, 255, 255) if label==1 else (255, 0, 0)) 
       cv2.rectangle(frame , (x1 , y1) , (x2 , y2) , color ,2)
       # 클래스 이름을 사용하여 텍스트 표시
       class_name = class_names[label] if label is not None else 'Unknown'
       cv2.putText(frame , f'GRU: {label}' , (x1 , y1 -10) , cv2.FONT_HERSHEY_SIMPLEX ,0.7 , color ,2)

       # 랜드마크 표시 
       mp_drawing.draw_landmarks(frame , results_pose.pose_landmarks , mp_pose.POSE_CONNECTIONS)

    # 프레임 저장 및 출력 
    resized_frame=cv2.resize(frame,(1920 ,1080))  
    out.write(resized_frame) 
    cv2.imshow('Fall Detection' , resized_frame) 
    if cv2.waitKey(1) & 0xFF==ord('q'):
       break

cap.release()
out.release()
cv2.destroyAllWindows()



Processed landmarks length: 22
BBox width: 439, height: 358, ratio: 1.2262569832402235, speed: 22.330371340889418
Predicted Class: 1, Probabilities: [   0.038052     0.73953     0.22242]
Processed landmarks length: 22
BBox width: 441, height: 394, ratio: 1.119289340101523, speed: 7.060977690259665
Predicted Class: 1, Probabilities: [   0.037483     0.74019     0.22233]
Processed landmarks length: 22
BBox width: 484, height: 417, ratio: 1.160671462829736, speed: 56.794296189125006
Predicted Class: 1, Probabilities: [   0.023751     0.77615      0.2001]
Processed landmarks length: 22
BBox width: 389, height: 371, ratio: 1.0485175202156334, speed: 38.520641438712275
Predicted Class: 1, Probabilities: [   0.038052     0.73953     0.22242]
Processed landmarks length: 22
BBox width: 486, height: 395, ratio: 1.230379746835443, speed: 18.204313168576896
Predicted Class: 1, Probabilities: [   0.023676     0.75962     0.21671]
Processed landmarks length: 22
BBox width: 567, height: 413, ratio: 1

### GRU 모델만 사용
* 속도 기반으로 초기 클래스 결정 후 bbox의 비율로 클래스 조정

In [5]:
# MediaPipe 초기화
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# 랜드마크 인덱스 정의 (예: 코, 왼쪽 어깨, 오른쪽 어깨 등)
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]  # 총 11개 랜드마크

# Threshold 값 정의
threshold_normal = 10.5   # 일반 상태로 간주되는 속도 임계값
threshold_danger = 15.5   # 위험 상태로 간주되는 속도 임계값

def calculate_head_upper_body_speed(keypoints, prev_keypoints):
    h = np.array([keypoints[0, 0], keypoints[0, 1]])   # 머리 좌표
    l = np.array([keypoints[11, 0], keypoints[11, 1]])  # 왼쪽 어깨 좌표
    r = np.array([keypoints[12, 0], keypoints[12, 1]])  # 오른쪽 어깨 좌표

    # 이전 프레임의 좌표가 없는 경우 속도는 0으로 설정
    if prev_keypoints is None:
        return 0.0

    prev_h = np.array([prev_keypoints[0, 0], prev_keypoints[0, 1]])
    prev_l = np.array([prev_keypoints[11, 0], prev_keypoints[11, 1]])
    prev_r = np.array([prev_keypoints[12, 0], prev_keypoints[12, 1]])

    # 현재 프레임과 이전 프레임의 상체 중심 계산
    center_new = (h + l + r) / 3
    center_prev = (prev_h + prev_l + prev_r) / 3

    # 유클리드 거리 계산 (속도)
    speed = distance.euclidean(center_new, center_prev)
    return speed

# GRU 모델 정의
class GRUModel(torch.nn.Module):
    def __init__(self, input_size=27):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size = 64
        self.num_layers = num_layers = 2
        self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True,
                          dropout=0.5)
        self.fc = nn.Linear(hidden_size, 3)  
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

# GRU 모델 로드
input_size = 27
gru_model = GRUModel(input_size=input_size)  
gru_model.load_state_dict(torch.load('D:\\project\\prjvenv\\GRU\\GRU_pts\\4. mediapipe, sensordata, bbox_ratio, speed\\mediapipe_sensordata_bbox_ratio_speed_except_normalization.pt', map_location=torch.device('cuda')))
gru_model.eval()
gru_model = gru_model.to(device)

# 클래스 이름 정의
class_names = {0: 'Normal', 1: 'Fall', 2: 'Danger'}

def process_landmarks(landmarks, bbox, speed):
    selected_landmarks = landmarks[LANDMARKS]   # 지정된 랜드마크 선택
    landmark_features = selected_landmarks[:, :2].flatten()  # (x,y) 좌표
    bbox_features = np.array(bbox).flatten()  # 바운딩 박스 좌표
    speed_feature = np.array([speed])  # 속도
    
    # 모든 특성을 결합
    features = np.concatenate([landmark_features, bbox_features, speed_feature])
    
    return features  # 총 27개의 특성 (22 + 4 + 1)

def calculate_and_draw_bbox(frame, landmarks):
    x_coordinates = landmarks[:, 0]
    y_coordinates = landmarks[:, 1]
    
    x1 = max(0, int(np.min(x_coordinates)))
    y1 = max(0, int(np.min(y_coordinates)))
    x2 = min(frame.shape[1], int(np.max(x_coordinates)))
    y2 = min(frame.shape[0], int(np.max(y_coordinates)))
    
    bbox_width = x2 - x1
    bbox_height = y2 - y1
    
    # 바운딩 박스를 조금 더 넓게 조정 (각 방향으로 패딩 추가)
    padding = 100
    x1 = max(0, x1 - padding)
    y1 = max(0, y1 - padding)
    x2 = min(frame.shape[1], x2 + padding)
    y2 = min(frame.shape[0], y2 + padding)

    return (x1, y1, x2, y2), bbox_width, bbox_height

# 낙상 감지 함수
def detect_fall(frame, landmarks, prev_landmarks, fall_frame_counter):
    global determine_fall, gru_model
    
    if determine_fall:
        return 1, fall_frame_counter
    
    speed = calculate_head_upper_body_speed(landmarks, prev_landmarks)
    bbox, bbox_width, bbox_height = calculate_and_draw_bbox(frame, landmarks)
    bbox_ratio = bbox_width / bbox_height if bbox_height != 0 else float('inf')
    
    processed_landmarks = process_landmarks(landmarks, bbox, speed)
    
    # GRU 모델 입력을 위한 데이터 준비
    input_data = torch.FloatTensor(processed_landmarks).unsqueeze(0).unsqueeze(0).to(device)
    
    # GRU 모델을 통한 예측
    with torch.no_grad():
        output = gru_model(input_data)
        _, predicted = torch.max(output.data, 1)
        bbox_class = predicted.item()
        print(f"GRU Raw Output: {output.cpu().numpy()}, Predicted Class: {bbox_class}")
    
    # 후처리: 속도와 bbox_ratio를 기반으로 예측 결과 보정
    if speed < threshold_normal and bbox_ratio < 0.6:
        bbox_class = 0  # Normal
    elif speed >= threshold_danger or bbox_ratio > 1.0 :
        bbox_class = 1  # Fall
    else:
        bbox_class = 2  # Danger
            
    print(f"Speed: {speed}, bbox_ratio: {bbox_ratio}")
    print(f"Final predicted class after post-processing: {bbox_class}")
    
    # Fall_counter 업데이트
    if bbox_class == 1:
        fall_frame_counter += 1
        if fall_frame_counter >= 20:
            determine_fall = True
    else:
        fall_frame_counter = 0

    return bbox_class, fall_frame_counter

# 비디오 파일 경로 지정 및 열기 
video_path = "D:\\041.낙상사고 위험동작 영상-센서 쌍 데이터\\3.개방데이터\\1.데이터\\Validation\\01.원천데이터\\VS\\영상\\N\\N\\01591_L_F_N_C8\\01591_L_F_N_C8.mp4"
cap = cv2.VideoCapture(video_path)

# 비디오 속성 가져오기 
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# 출력 비디오 설정 
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_path = 'C:\\Users\\user\\Desktop\\prj_sample_vid\\testsample.mp4'
out = cv2.VideoWriter(out_path, fourcc, fps, (width, height))

# 프레임 처리 루프 
fall_frame_counter = 0
determine_fall = False
prev_landmarks = None

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results_pose = pose.process(rgb_frame)

    if results_pose.pose_landmarks:
        landmarks = np.array([[lm.x * width, lm.y * height, lm.z] for lm in results_pose.pose_landmarks.landmark])
       
        label, fall_frame_counter = detect_fall(frame, landmarks, prev_landmarks, fall_frame_counter)  
        print(f"Predicted Class: {label}")  

        # 바운딩 박스와 라벨 그리기 
        bbox, _, _ = calculate_and_draw_bbox(frame, landmarks)
        color = (0, 255, 0) if label == 0 else ((0, 255, 255) if label == 2 else (0, 0, 255)) 
        cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 4)
        class_name = class_names[label] if label is not None else 'Unknown'
        cv2.putText(frame, f'{class_name}', (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 2, color, 3)

        if determine_fall: 
            text = "FALL"
            font = cv2.FONT_HERSHEY_DUPLEX
            font_scale = 3
            color = (255, 255, 255)  # 텍스트 색상 (흰색)
            thickness = 6
            box_color = (0, 0, 255)  # 박스 색상 (빨간색)

            # 텍스트 크기 계산
            text_size = cv2.getTextSize(text, font, font_scale, thickness)[0]

            # 중앙 좌표 계산
            frame_height, frame_width = frame.shape[:2]
            text_x = (frame_width - text_size[0]) // 2
            text_y = (frame_height + text_size[1]) // 2

            # 박스 좌표 계산 (텍스트 배경)
            box_x1 = 0  # 좌측 여백
            box_y1 = text_y - text_size[1] - 30  # 상단 여백
            box_x2 = frame_width + 30  # 우측 여백
            box_y2 = text_y + 30  # 하단 여백

            # 속이 채워진 박스 그리기
            cv2.rectangle(frame, (box_x1, box_y1), (box_x2, box_y2), box_color, -1)
            # 텍스트 추가
            cv2.putText(frame, text, (text_x, text_y), font, font_scale, color, thickness)
                        
        # 랜드마크 표시 
        mp_drawing.draw_landmarks(frame, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        prev_landmarks = landmarks

    # 프레임 저장 및 출력 
    resized_frame = cv2.resize(frame, (1920, 1080))  
    out.write(frame) 
    cv2.imshow('Fall Detection', resized_frame) 
    if cv2.waitKey(1) & 0xFF == ord('q'):
         break

cap.release()
out.release()
cv2.destroyAllWindows()

RuntimeError: Error(s) in loading state_dict for GRUModel:
	size mismatch for gru.weight_ih_l0: copying a param with shape torch.Size([192, 28]) from checkpoint, the shape in current model is torch.Size([192, 27]).

In [3]:
# MediaPipe 초기화
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

# 랜드마크 인덱스 정의 
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]  # 총 11개 랜드마크

# GRU 모델 정의
class GRUModel(torch.nn.Module):
    def __init__(self, input_size=27):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size = 64
        self.num_layers = num_layers = 2
        self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True,
                          dropout=0.5)
        self.fc = nn.Linear(hidden_size, 3)  
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

# GRU 모델 로드
input_size = 28
gru_model = GRUModel(input_size=input_size)  
gru_model.load_state_dict(torch.load('D:\\project\\prjvenv\\final_GRU_training.pt', map_location=torch.device('cpu')))
gru_model.eval()

# GPU 사용 가능 시 GPU로 모델 이동
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
gru_model = gru_model.to(device)

# 클래스 이름 정의
class_names = {0: 'Normal', 1: 'Fall', 2: 'Danger'}

def process_landmarks(landmarks, bbox_ratio, bbox_coords):
    selected_landmarks = landmarks[LANDMARKS]   # 지정된 랜드마크 선택
    landmark_features = selected_landmarks[:, :2].flatten()  # (x,y) 좌표
    
    # 바운딩 박스 좌표
    bbox_coord_feature = np.array(bbox_coords)
    
    # 바운딩 박스 비율 및 속도 정보 추가
    bbox_feature = np.array([bbox_ratio])  # 바운딩 박스 비율
    speed_feature = np.array([0])  # 속도 

    # 모든 특성을 결합하여 총 28개 특성 생성
    features = np.concatenate([landmark_features, bbox_feature, bbox_coord_feature, speed_feature])
    return features

def calculate_and_draw_bbox(frame, landmarks):
    x_coordinates = landmarks[:, 0]
    y_coordinates = landmarks[:, 1]
    
    x1 = max(0, int(np.min(x_coordinates)))
    y1 = max(0, int(np.min(y_coordinates)))
    x2 = min(frame.shape[1], int(np.max(x_coordinates)))
    y2 = min(frame.shape[0], int(np.max(y_coordinates)))

    # 바운딩 박스를 조금 더 넓게 조정 (각 방향으로 패딩 추가)
    padding = 50
    x1 = max(0, x1 - padding)
    y1 = max(0, y1 - padding)
    x2 = min(frame.shape[1], x2 + padding)
    y2 = min(frame.shape[0], y2 + padding)

    # 바운딩 박스 비율 계산
    bbox_width = x2 - x1
    bbox_height = y2 - y1
    bbox_ratio = bbox_width / bbox_height if bbox_height != 0 else float('inf')  # 높이가 0일 경우 무한대로 설정

    return (x1, y1), (x2, y2), bbox_ratio

def calculate_head_upper_body_speed(keypoints, prev_keypoints):
    h = np.array([keypoints[0, 0], keypoints[0, 1]])   # 머리 좌표
    l = np.array([keypoints[11, 0], keypoints[11, 1]])  # 왼쪽 어깨 좌표
    r = np.array([keypoints[12, 0], keypoints[12, 1]])  # 오른쪽 어깨 좌표

    # 이전 프레임의 좌표가 없는 경우 속도는 0으로 설정
    if prev_keypoints is None:
        return 0.0

    prev_h = np.array([prev_keypoints[0, 0], prev_keypoints[0, 1]])
    prev_l = np.array([prev_keypoints[11, 0], prev_keypoints[11, 1]])
    prev_r = np.array([prev_keypoints[12, 0], prev_keypoints[12, 1]])

    # 현재 프레임과 이전 프레임의 상체 중심 계산
    center_new = (h + l + r) / 3
    center_prev = (prev_h + prev_l + prev_r) / 3

    # 유클리드 거리 계산 (속도)
    speed = distance.euclidean(center_new, center_prev)
    return speed

# 낙상 감지 함수
def detect_fall(frame, landmarks, prev_landmarks):
    # 바운딩 박스 및 속도 정보 계산
    bbox_coords_top_left, bbox_coords_bottom_right, bbox_ratio = calculate_and_draw_bbox(frame, landmarks)
    # 바운딩 박스 좌표 (x1, y1, x2, y2) 포함
    bbox_coords = [bbox_coords_top_left[0], bbox_coords_top_left[1],
                   bbox_coords_bottom_right[0], bbox_coords_bottom_right[1]]
    processed_landmarks = process_landmarks(landmarks, bbox_ratio, bbox_coords)    
    speed = calculate_head_upper_body_speed(landmarks, prev_landmarks)

    # GRU 모델 입력을 위한 데이터 준비
    input_data = torch.FloatTensor(processed_landmarks).unsqueeze(0).unsqueeze(0).to(device)
    
    # GRU 모델을 통한 예측
    with torch.no_grad():
        output = gru_model(input_data)
        _, predicted_class_index = torch.max(output.data, 1)

    return predicted_class_index.item(), bbox_coords_top_left, bbox_coords_bottom_right, speed, bbox_ratio

# 비디오 파일 경로 지정 및 열기 
video_path = "D:\\project\\data\\src\\mp4\\00007_H_A_FY_C1.mp4"
cap = cv2.VideoCapture(video_path)

# 비디오 속성 가져오기 
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# 출력 비디오 설정 
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out_path='GRU_model_output.mp4'
out=cv2.VideoWriter(out_path,fourcc,fps,(width,height))

prev_landmarks=None

while cap.isOpened():
    ret , frame=cap.read()
    
    if not ret:
        break

    rgb_frame=cv2.cvtColor(frame , cv2.COLOR_BGR2RGB)
    results_pose=pose.process(rgb_frame)

    if results_pose.pose_landmarks:
        landmarks=np.array([[lm.x * width, lm.y * height] for lm in results_pose.pose_landmarks.landmark])
       
        label, bbox_top_left, bbox_bottom_right, speed, bbox_ratio = detect_fall(frame, landmarks, prev_landmarks)  
        
        print(f"Predicted Class: {label}, Speed: {speed}, BBox Ratio: {bbox_ratio}")  

        # 바운딩 박스와 라벨 그리기 
        color=(0, 255, 0) if label==0 else ((0, 255, 255) if label==2 else (0, 0, 255)) 
        cv2.rectangle(frame, bbox_top_left, bbox_bottom_right, color, 2)
        
        class_name=class_names[label]
        cv2.putText(frame, f'GRU: {class_name}', (bbox_top_left[0], bbox_top_left[1] -10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)

        # 랜드마크 표시 
        mp_drawing.draw_landmarks(frame, results_pose.pose_landmarks, mp_pose.POSE_CONNECTIONS)

        prev_landmarks=landmarks

    # 프레임 저장 및 출력 
    resized_frame=cv2.resize(frame,(1920 ,1080))  
    out.write(frame) 
    cv2.imshow('Fall Detection', resized_frame) 
   
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()