### mediapipe의 랜드마크, bbox의 좌표, bbox의 비율을 학습시켰을 때의 비디오 테스트
* input_size = 27
* sequence_length = 3

In [2]:
import cv2
import mediapipe as mp
import numpy as np
import torch
from torch.utils.data import Dataset
import torch.nn as nn

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [6]:
# 랜드마크 인덱스 정의 
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]

class FallDetectionGRU(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, output_size=3, dropout=0.5):
        super(FallDetectionGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

In [15]:
# 모델 초기화 및 가중치 로드
input_size = 27  # 랜드마크 x,y 좌표 + bbox 좌표 + bbox 비율 
model = FallDetectionGRU(input_size).to(device)
model.load_state_dict(torch.load('D:\\project\\prjvenv\\GRU\\GRU_pts\\3. mediapipe, sensordata, bbox\\mediapipe_sensordata_bbox_except_normalization.pt', map_location=device))
model.eval()

FallDetectionGRU(
  (gru): GRU(27, 64, num_layers=2, batch_first=True, dropout=0.5)
  (fc): Linear(in_features=64, out_features=3, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [16]:
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

In [35]:
# 비디오 파일 경로 설정 및 열기
video_path = 'D:\\human_fall\\re_video\\validation\\Y\\00170_H_A_SY_C5.mp4'
cap = cv2.VideoCapture(video_path)

# 시퀀스 길이 설정 (훈련 시 사용한 값과 일치해야 함)
sequence_length = 3  
data_sequence = []

# GRU 모델 초기화 및 가중치 로드
input_size = 27  # 랜드마크 x,y 좌표 + bbox 좌표 + bbox 비율 
model = FallDetectionGRU(input_size).to(device)
model.load_state_dict(torch.load('D:\\project\\prjvenv\\GRU\\GRU_pts\\3. mediapipe, sensordata, bbox\\mediapipe_sensordata_bbox_except_normalization.pt', map_location=device))
model.eval()

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # BGR 이미지를 RGB로 변환 및 랜드마크 추출
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    if results.pose_landmarks:
        landmarks = []
        
        # 랜드마크 추출 및 표시
        for landmark_idx in LANDMARKS:
            landmark = results.pose_landmarks.landmark[landmark_idx]
            landmarks.append([landmark.x * frame.shape[1], landmark.y * frame.shape[0]])  # 픽셀 좌표로 변환
            
            # 랜드마크를 비디오 프레임에 표시
            cv2.circle(frame, (int(landmark.x * frame.shape[1]), int(landmark.y * frame.shape[0])), 5, (0, 255, 0), -1)

        # 랜드마크 배열 변환 및 시퀀스 추가
        landmarks_array = np.array(landmarks).flatten()
        
        # YOLO bbox 좌표 추가 (예시로 기본값 사용)
        bbox_x1, bbox_y1, bbox_x2, bbox_y2 = 0, 0, frame.shape[1], frame.shape[0]  # 기본값 (전체 프레임)
        
        # bbox 비율 계산
        bbox_ratio_value = (bbox_x2 - bbox_x1) / (bbox_y2 - bbox_y1) if (bbox_y2 - bbox_y1) != 0 else 0
        
        # 랜드마크 + bbox 정보 추가
        landmarks_array = np.concatenate((landmarks_array,
                                           [bbox_x1, bbox_y1, bbox_x2, bbox_y2],
                                           [bbox_ratio_value]))

        if len(data_sequence) < sequence_length:
            data_sequence.append(landmarks_array)
        
        if len(data_sequence) == sequence_length:
            input_data = np.array(data_sequence).reshape(1, sequence_length, -1)  # (batch_size=1, sequence_length=3, input_size=27)
            input_tensor = torch.FloatTensor(input_data).to(device)

            with torch.no_grad():
                outputs = model(input_tensor)
                predicted_label_id = torch.argmax(outputs).item()

                # 예측된 클래스 이름 출력
                label_name = {0: 'Normal', 1: 'Danger', 2: 'Fall'}
                predicted_label_name = label_name[predicted_label_id]

                print(f"Predicted Class: {predicted_label_name}")  # 예측 결과 콘솔에 출력

                # 바운딩 박스 계산 (랜드마크 기반)
                #x_min = int(np.min(landmarks_array[::2]))   
                #y_min = int(np.min(landmarks_array[1::2]))   
                #x_max = int(np.max(landmarks_array[::2]))    
                #y_max = int(np.max(landmarks_array[1::2]))    

                # 바운딩 박스 그리기
                #cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)

                # 예측된 클래스 이름을 바운딩 박스 왼쪽 상단에 표시
                if predicted_label_name == 'Normal' : 
                    cv2.putText(frame, f"Class: {predicted_label_name}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX,
                            5, (0, 255, 0), 4)
                elif predicted_label_name == 'Danger' :
                    cv2.putText(frame, f"Class: {predicted_label_name}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX,
                            5, (0, 255, 255), 4)
                else : 
                    cv2.putText(frame, f"Class: {predicted_label_name}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX,
                            5, (0, 0, 255), 4)

            # 시퀀스 초기화 
            data_sequence.pop(0)  

        # 랜드마크 표시 
        mp_drawing.draw_landmarks(frame, results.pose_landmarks, mp_pose.POSE_CONNECTIONS)

    resized_frame = cv2.resize(frame, (1920, 1080))
    
    # 비디오 프레임 출력 
    cv2.imshow('Fall Detection', resized_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
         break

cap.release()
cv2.destroyAllWindows()

Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Danger
Predicted Class: Danger
Predicted Class: Danger
Predicted Class: Fall
Predicted Class: Fall
Predicted Class: Fall
Predicted Class: Fall
Predicted Class: Fall
Predicted Class: Fall
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Predicted Class: Normal
Pred