### mediapipe의 랜드마크, bbox의 좌표, bbox의 ratio, speed를 학습시켰을 때의 비디오 테스트
* input_size = 28
* sequence_length = 3

In [5]:
import cv2
import mediapipe as mp
import numpy as np
import torch
import torch.nn as nn
from scipy.spatial import distance

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [15]:
# MediaPipe Pose 초기화
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

In [16]:
# 랜드마크 인덱스 정의 
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]

# GRU 모델 정의
class FallDetectionGRU(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, num_classes=3, dropout=0.5):
        super(FallDetectionGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

In [17]:
def calculate_head_upper_body_speed(current_frame, prev_frame):
    h = np.array([current_frame['landmark_0']['x'], current_frame['landmark_0']['y']])
    l = np.array([current_frame['landmark_11']['x'], current_frame['landmark_11']['y']])
    r = np.array([current_frame['landmark_12']['x'], current_frame['landmark_12']['y']])
    
    prev_h = np.array([prev_frame['landmark_0']['x'], prev_frame['landmark_0']['y']])
    prev_l = np.array([prev_frame['landmark_11']['x'], prev_frame['landmark_11']['y']])
    prev_r = np.array([prev_frame['landmark_12']['x'], prev_frame['landmark_12']['y']])
    
    center_new = (h + l + r) / 3
    center_prev = (prev_h + prev_l + prev_r) / 3
    
    return distance.euclidean(center_new, center_prev)

In [18]:
def calculate_and_draw_bbox(frame, landmarks):
    x_coordinates = landmarks[:, 0]
    y_coordinates = landmarks[:, 1]
    
    x1 = max(0, int(np.min(x_coordinates)))
    y1 = max(0, int(np.min(y_coordinates)))
    x2 = min(frame.shape[1], int(np.max(x_coordinates)))
    y2 = min(frame.shape[0], int(np.max(y_coordinates)))

    # 바운딩 박스를 조금 더 넓게 조정 (각 방향으로 패딩 추가)
    padding = 50
    x1 = max(0, x1 - padding)
    y1 = max(0, y1 - padding)
    x2 = min(frame.shape[1], x2 + padding)
    y2 = min(frame.shape[0], y2 + padding)

    # 바운딩 박스 비율 계산
    bbox_width = x2 - x1
    bbox_height = y2 - y1
    bbox_ratio = bbox_width / bbox_height if bbox_height != 0 else float('inf')  # 높이가 0일 경우 무한대로 설정
    
    # 바운딩 박스 그리기
    cv2.rectangle(frame, (x1, y1), (x2, y2), (255, 0, 0), 2)

    return (x1, y1), (x2, y2), bbox_ratio

In [21]:
# 비디오 파일 경로 설정 및 열기
video_path = 'D:\\human_fall\\re_video\\validation\\Y\\00170_H_A_SY_C5.mp4'
cap = cv2.VideoCapture(video_path)

sequence_length = 3

# GRU 모델 초기화 및 가중치 로드
input_size = 28  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = FallDetectionGRU(input_size).to(device)
model.load_state_dict(torch.load('D:\\project\\prjvenv\\GRU\\GRU_pts\\4. mediapipe, sensordata, bbox_ratio, speed\\mediapipe_sensordata_bbox_ratio_speed_except_normalizaion.pt', map_location=device))
model.eval()

data_sequence = []
previous_landmarks_dict = None

mp_pose = mp.solutions.pose
pose = mp_pose.Pose()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # BGR 이미지를 RGB로 변환 및 랜드마크 추출
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    if results.pose_landmarks:
        landmarks_dict_current = {f'landmark_{i}': {'x': results.pose_landmarks.landmark[i].x,
                                                     'y': results.pose_landmarks.landmark[i].y} for i in range(len(results.pose_landmarks.landmark))}
        
        landmarks_array_current_flattened = []
        
        for landmark_idx in LANDMARKS:
            landmark = results.pose_landmarks.landmark[landmark_idx]
            landmarks_array_current_flattened.append([landmark.x * frame.shape[1], landmark.y * frame.shape[0]])  # 픽셀 좌표로 변환
            
            # 랜드마크를 비디오 프레임에 표시
            cv2.circle(frame, (int(landmark.x * frame.shape[1]), int(landmark.y * frame.shape[0])), 5, (0, 255, 0), -1)

        # 현재 프레임의 랜드마크 배열 변환 
        landmarks_array_current_flattened_np = np.array(landmarks_array_current_flattened)

        # 바운딩 박스 및 비율 계산
        bbox_ratio_value = calculate_and_draw_bbox(frame, landmarks_array_current_flattened_np)

        # 속도 정보 추가
        speed_value = 0.0
        
        if previous_landmarks_dict is not None:
            speed_value = calculate_head_upper_body_speed(landmarks_dict_current, previous_landmarks_dict)

        previous_landmarks_dict = landmarks_dict_current  

        # 랜드마크 + bbox 정보 추가
        landmarks_array_combined = np.concatenate((landmarks_array_current_flattened_np.flatten(),
                                                   [bbox_ratio_value],
                                                   [speed_value]))

        data_sequence.append(landmarks_array_combined)

        if len(data_sequence) == sequence_length:
            input_data = np.array(data_sequence).reshape(1, sequence_length, -1)  
            input_tensor = torch.FloatTensor(input_data).to(device)

            with torch.no_grad():
                outputs = model(input_tensor)
                predicted_label_id = torch.argmax(outputs).item()

                # 예측된 클래스 이름 출력
                label_name_mapping = {0: 'Normal', 1: 'Danger', 2: 'Fall'}
                predicted_label_name = label_name_mapping[predicted_label_id]

                print(f"Predicted Class: {predicted_label_name}")  

                # 예측된 클래스 이름을 바운딩 박스 왼쪽 상단에 표시
                cv2.putText(frame,
                            predicted_label_name,
                            (10, 30), cv2.FONT_HERSHEY_SIMPLEX,
                            1.5,(255,255,255),3)

            data_sequence.pop(0)  

        # 랜드마크 표시 
        mp_drawing.draw_landmarks(frame, results.pose_landmarks)

    resized_frame = cv2.resize(frame,(1920 ,1080))
    
    # 비디오 프레임 출력 
    cv2.imshow('Fall Detection', resized_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
         break

cap.release()
cv2.destroyAllWindows()

RuntimeError: Error(s) in loading state_dict for FallDetectionGRU:
	size mismatch for gru.weight_ih_l0: copying a param with shape torch.Size([192, 27]) from checkpoint, the shape in current model is torch.Size([192, 28]).