### Mediapipe의 랜드마크만 학습시켰을 때의 비디오 테스트
* input_size = 27
* sequence_length = 3

In [62]:
import cv2
import mediapipe as mp
import numpy as np
import torch
import torch.nn as nn

In [60]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [63]:
# 랜드마크 인덱스 정의 
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]

class FallDetectionGRU(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, output_size=3, dropout=0.5):
        super(FallDetectionGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

# 모델 초기화 및 가중치 로드
input_size = 22 
model = FallDetectionGRU(input_size).to(device)
model.load_state_dict(torch.load('D:\\project\\prjvenv\\GRU\\GRU_pts\\2. mediapipe & sensordata\\mediapipe_sensordata_except_normalization.pt', map_location=device))
model.eval()

FallDetectionGRU(
  (gru): GRU(22, 64, num_layers=2, batch_first=True, dropout=0.5)
  (fc): Linear(in_features=64, out_features=3, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [64]:
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

In [83]:
video_path = 'D:\\human_fall\\re_video\\validation\\Y\\00170_H_A_SY_C4.mp4'
cap = cv2.VideoCapture(video_path)

sequence_length = 3  # 시퀀스 길이 설정 (훈련 시 사용한 값과 일치해야 함)
data_sequence = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # BGR 이미지를 RGB로 변환 및 랜드마크 추출
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    if results.pose_landmarks:
        landmarks = []
        
        # 랜드마크 추출
        for landmark_idx in LANDMARKS:
            landmark = results.pose_landmarks.landmark[landmark_idx]
            landmarks.append([landmark.x, landmark.y])
        
        # 랜드마크 배열 변환 및 시퀀스 추가
        landmarks_array = np.array(landmarks).flatten()
        
        if len(data_sequence) < sequence_length:
            data_sequence.append(landmarks_array)
        
        if len(data_sequence) == sequence_length:
            input_data = np.array(data_sequence).reshape(1, sequence_length, -1)
            input_tensor = torch.FloatTensor(input_data).to(device)

            with torch.no_grad():
                outputs = model(input_tensor)
                predicted_label_id = torch.argmax(outputs).item()

                # 예측된 클래스 이름 출력
                label_name = {0: 'Normal', 1: 'Danger', 2: 'Fall'}
                predicted_label_name = label_name[predicted_label_id]
                if predicted_label_name == 'Normal' : 
                    cv2.putText(frame, f"Class: {predicted_label_name}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX,
                            5, (0, 255, 0), 4)
                elif predicted_label_name == 'Danger' :
                    cv2.putText(frame, f"Class: {predicted_label_name}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX,
                            5, (0, 255, 255), 4)
                else : 
                    cv2.putText(frame, f"Class: {predicted_label_name}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX,
                            5, (0, 0, 255), 4)
            
            # 시퀀스 초기화 (이전 시퀀스를 제거하고 새로운 시퀀스를 시작할 수 있음)
            data_sequence.pop(0)  # 첫 번째 프레임 제거
    
    resized_frame = cv2.resize(frame, (1920, 1080))
    # 비디오 프레임 출력
    cv2.imshow('Fall Detection', resized_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 리소스 해제
cap.release()
cv2.destroyAllWindows()