In [1]:
import cv2
import torch
import numpy as np
import torch.nn as nn
import mediapipe as mp
import time
from ultralytics import YOLO

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [72]:
# YOLOv8 모델 로드
yolo_model = YOLO('D:\\project\\prjvenv\\runs\\detect\\human_fall_s30\\weights\\best.pt')

# 낙상 감지 함수
def detect_fall(landmarks, bbox_width, bbox_height, bbox_ratio, confidence):
    processed_landmarks = process_landmarks(landmarks)
    input_data = np.concatenate([processed_landmarks, [bbox_width, bbox_height, bbox_ratio, confidence]])
    input_tensor = torch.FloatTensor(input_data).unsqueeze(0).unsqueeze(0)

    with torch.no_grad():
        output = gru_model(input_tensor)
    
    probabilities = torch.softmax(output, dim=1).numpy()[0]
    predicted_class = torch.argmax(output, dim=1).item()
    
    print(f"Probabilities: Normal={probabilities[0]:.4f}, Danger={probabilities[1]:.4f}, Fall={probabilities[2]:.4f}")
    return predicted_class, probabilities

# MediaPipe 초기화
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)

LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]

def process_landmarks(landmarks): 
    selected_landmarks = landmarks[LANDMARKS]
    return selected_landmarks[:, :2].flatten()

# GRU 모델 로드
class GRUModel(torch.nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, output_size=3, dropout=0.5):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

input_size = 26 # 랜드마크 x,y 좌표 + 바운딩박스 비율
hidden_size = 64
num_layers = 2
output_size = 3
dropout = 0.5    

gru_model = GRUModel(input_size, hidden_size, num_layers, output_size, dropout)
gru_model.load_state_dict(torch.load('D:\\project\\prjvenv\\GRU\\add_sensordata_bbox_newclass.pt', map_location=torch.device('cpu')))
gru_model.eval()

# 비디오 파일 경로 지정
video_path = "D:\\human_fall\\re_video\\training\\Y\\01130_O_E_BY_C1.mp4"

# 비디오 파일 열기
cap = cv2.VideoCapture(video_path)

# 비디오 속성 가져오기
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)

# 출력 비디오 설정
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('data_video_test_outputY.mp4', fourcc, fps, (width, height))

confidence_threshold = 0.3
# 프레임 처리 루프
frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break
    if frame_count % 5 == 0 : 
        results = yolo_model(frame)
    frame_count += 1

    # YOLOv8로 사람 감지
    results = yolo_model(frame)
    
    for result in results:
        boxes = result.boxes.xyxy.cpu().numpy().astype(int)
        confidences = result.boxes.conf.cpu().numpy() # 신뢰도 점수 추출
        class_ids = result.boxes.cls.cpu().numpy() # 클래스 ID 추출
        
        for i, box in enumerate(boxes):
            if class_ids[i] == 0 and confidences[i] > confidence_threshold : 
                x1, y1, x2, y2 = box
                confidence = confidences[i] # 해당 박스의 신뢰도 점수
            
                # 바운딩 박스 비율 계산
                bbox_width = x2 - x1
                bbox_height = y2 - y1
                bbox_ratio = bbox_width / bbox_height
            
                # MediaPipe로 랜드마크 추출
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                results_pose = pose.process(rgb_frame)

                if results_pose.pose_landmarks:
                    landmarks = np.array([[lm.x * width , lm.y * height] for lm in results_pose.pose_landmarks.landmark])  # 화면 크기에 맞게 조정
                    try:
                        label, probs = detect_fall(landmarks, bbox_width, bbox_height, bbox_ratio, confidence)         
                        # GRU 모델이 감지한 경우 우선적으로 표시
                        color = (0 , 255 , 0) if label == 0 else ((255 , 0 , 0) if label == 1 else (0 , 255 , 255))
                                                   
                        cv2.rectangle(frame,(x1,y1),(x2,y2),color ,2) # GRU에서 감지한 bbox 
                        cv2.putText(frame,f'GRU: {label}', (x1,y1 -10), cv2.FONT_HERSHEY_SIMPLEX ,0.7,color ,2)

                        # 랜드마크 표시하기
                        for lm in landmarks: 
                            cv2.circle(frame , (int(lm[0]), int(lm[1])), radius=5 , color=(255 , 0 , 0) , thickness=-1)  # 랜드마크를 파란색 원으로 표시

                    except Exception as e:
                        print(f"에러 :{e}")
                else:
                    # MediaPipe가 랜드마크를 감지하지 못한 경우 YOLO 결과 표시
                    yolo_label = "Fall" if class_ids[i] == 1 else "Normal"
                    yolo_color = (0 , 255 , 0) if class_ids[i] == 1 else (255 , 0 , 0)
                    cv2.rectangle(frame,(x1,y1),(x2,y2),yolo_color ,2)  
                    cv2.putText(frame,f"YOLO: {yolo_label}",(x1,y1 -30),cv2.FONT_HERSHEY_SIMPLEX ,0.7,yolo_color ,2)

    # 프레임 저장 및 출력
    resized_frame = cv2.resize(frame,(1920 ,1080))  
    out.write(resized_frame)
    cv2.imshow('Fall Detection', resized_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()


0: 384x640 1 Non_Fall, 4.0ms
Speed: 1.0ms preprocess, 4.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 3.0ms
Speed: 2.0ms preprocess, 3.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 4.0ms
Speed: 1.0ms preprocess, 4.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 4.0ms
Speed: 1.0ms preprocess, 4.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 4.0ms
Speed: 1.0ms preprocess, 4.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 4.0ms
Speed: 1.0ms preprocess, 4.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 4.0ms
Speed: 1.0ms preprocess, 4.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 Non_Fall, 4.0ms
Speed: 1.0ms preprocess, 4.0ms inference, 0.0ms postprocess per image at shape (1, 3, 38