## mediapipe 랜드마크만 학습
* 정의된 랜드마크(11개)만 학습
* input_size = 22

In [2]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import cv2
import mediapipe as mp
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score, confusion_matrix


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

### GRU training

In [None]:
# 랜드마크 인덱스 정의
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]

# 데이터셋 클래스 정의
class FallSequenceDataset(Dataset):
    def __init__(self, json_files, sequence_length=3):
        self.sequence_length = sequence_length
        self.sequences = []
        self.labels = []
        #self.scaler = StandardScaler()
        print(f"LANDMARKS len :{len(LANDMARKS)}")
          
        for json_file in json_files:
            print(f'Processing file: {json_file}')
            with open(json_file, 'r') as f:
                data = json.load(f)
            
            frames = list(data['pose_data'].values())
            
            for i in range(0, len(frames) - self.sequence_length + 1):
                sequence = frames[i:i+self.sequence_length]
                landmarks = []
                fall_frames = 0
                
                for frame in sequence:
                    frame_landmarks = []
                    for landmark in LANDMARKS:
                        if f'landmark_{landmark}' not in frame:
                            print(f"Missing landmark {landmark} in frame")
                            continue
                        frame_landmarks.extend([
                            frame[f'landmark_{landmark}']['x'],
                            frame[f'landmark_{landmark}']['y']
                        ])
                    landmarks.append(frame_landmarks)
                    if frame['class'] == 'Fall':
                        fall_frames += 1

                # 레이블 재정의
                label = self.define_label(fall_frames)
                
                self.sequences.append(landmarks)  # landmarks를 추가
                self.labels.append(label)
                
                # 전체 데이터 정규화
                #all_landmarks = np.array(all_landmarks)
                #all_landmarks_scaled = self.scaler.fit_transform(all_landmarks)
        
                # 정규화된 데이터를 다시 시퀀스로 재구성
                #for i in range(len(self.sequences)):
                #    start = i * self.sequence_length
                #    end = start + self.sequence_length
                #    self.sequences[i] = all_landmarks_scaled[start:end]
        
        if not self.sequences:
            raise ValueError("No valid sequences found in the dataset")

    def define_label(self, fall_frames):
        if fall_frames == 0:
            return 0  # 비낙상
        elif fall_frames == self.sequence_length:
            return 2  # 완전 낙상
        else:
            return 1  # 낙상 위험

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        if idx >= len(self.sequences):
            raise IndexError(f"Index {idx} out of range. Dataset length: {len(self.sequences)}")
                
        sequence = np.array(self.sequences[idx])
        return torch.FloatTensor(sequence), torch.LongTensor([self.labels[idx]]).squeeze()

# GRU 모델 정의
class FallDetectionGRU(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, output_size=3, dropout = 0.5):
        super(FallDetectionGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout = dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[ : , -1, :])
        out = self.fc(out)
        return out

# 데이터 로드 및 전처리
json_folder = 'D:\\human_fall\\re_landmark\\addition_yolobbox_json_6'
json_files = [os.path.join(json_folder, f) for f in os.listdir(json_folder) if f.endswith('.json')]
dataset = FallSequenceDataset(json_files)

# 데이터셋을 학습 및 검증 세트로 분할
train_indices, val_indices = train_test_split(range(len(dataset)), test_size=0.2)
train_dataset = torch.utils.data.Subset(dataset, train_indices)
val_dataset = torch.utils.data.Subset(dataset, val_indices)

# 데이터 로더 생성 전 클래스 가중치 계산
train_labels = np.array([label for _, label in train_dataset], dtype=int)
class_weights = compute_class_weight('balanced', classes=np.unique(train_labels), y=train_labels)
class_weights = torch.FloatTensor(class_weights).to(device)

# 손실 함수에 가중치 적용
criterion = nn.CrossEntropyLoss(weight=class_weights)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

if len(dataset) > 0:
    sample_sequence, sample_label = dataset[0]
    input_size = sample_sequence.shape[1]
    print(f'input_size :{input_size}')
    model = FallDetectionGRU(input_size).to(device)
else:
    print("데이터 없음")
    exit()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001,  weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)
num_epochs = 500
best_loss = float('inf')
patience = 15
no_improve = 0

for epoch in range(num_epochs):
    model.train()
    total_loss_train = 0
    
    for sequences, labels in train_loader:
        sequences, labels = sequences.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(sequences)
        loss_train = criterion(outputs, labels.view(-1))
        loss_train.backward()
        optimizer.step()
        
        total_loss_train += loss_train.item()
    
    avg_loss_train = total_loss_train / len(train_loader)

    # 검증 단계 추가
    model.eval()
    total_loss_val = 0
    
    with torch.no_grad():
        for sequences_val, labels_val in val_loader:
            sequences_val, labels_val = sequences_val.to(device), labels_val.to(device)
            outputs_val = model(sequences_val)
            loss_val = criterion(outputs_val, labels_val.view(-1))
            total_loss_val += loss_val.item()
    
    avg_loss_val = total_loss_val / len(val_loader)

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_loss_train:.4f}, Val Loss: {avg_loss_val:.4f}')
    scheduler.step(avg_loss_val)
    
    if avg_loss_val < best_loss:
        best_loss = avg_loss_val
        no_improve = 0
        torch.save(model.state_dict(), 'only_mediapipe_except_normalization.pt')
    else:
        no_improve += 1
    
    if no_improve >= patience:
        print("Early stopping")
        break
    
def calculate_metrics(model, data_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for sequences, labels in data_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            outputs = model(sequences)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    f1 = f1_score(all_labels, all_preds, average='weighted')
    cm = confusion_matrix(all_labels, all_preds)
    return f1, cm

# 학습 루프 내에서 성능 지표 계산
train_f1, train_cm = calculate_metrics(model, train_loader)
val_f1, val_cm = calculate_metrics(model, val_loader)
print(f'Train F1: {train_f1:.4f}, Val F1: {val_f1:.4f}')
print(f'Train CM:\n{train_cm}\nVal CM:\n{val_cm}')

print("Training completed")

# F1 스코어와 혼동 행렬을 파일로 저장
def save_metrics(f1_train, cm_train, f1_val, cm_val, file_path='only_mediapipe.pt_except_normalization.txt'):
    with open(file_path, 'w') as f:
        f.write(f'Train F1: {f1_train:.4f}\n')
        f.write(f'Val F1: {f1_val:.4f}\n')
        f.write(f'Train Confusion Matrix:\n{cm_train}\n')
        f.write(f'Val Confusion Matrix:\n{cm_val}\n')

# 결과 저장
save_metrics(train_f1, train_cm, val_f1, val_cm, file_path='only_mediapipe.pt_except_normalization.txt')

print("저장 완료")

### video 파일로 테스트 및 저장

In [1]:
# 모델 로드 함수
def load_model(model_path, input_size):
    model = FallDetectionGRU(input_size).to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

# Mediapipe Pose 초기화
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(min_detection_confidence=0.5, min_tracking_confidence=0.5)
LABELS = {0: 'Normal', 1: 'Fall'}

def process_video(video_path, model, output_path):
    cap = cv2.VideoCapture(video_path)
    
    # 비디오 속성 가져오기
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    # VideoWriter 초기화
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  
    out = cv2.VideoWriter(output_path, fourcc, fps, (1920, 1080))

    predictions = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # 이미지 색상 변환 (BGR -> RGB)
        image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(image_rgb)

        # 랜드마크가 존재하는 경우에만 처리
        if results.pose_landmarks:
            landmarks = extract_landmarks(results)
            if len(landmarks) < len(LANDMARKS):  # 랜드마크 수가 부족한 경우 처리
                # 부족한 랜드마크에 대해 패딩 추가 (0으로 채우기)
                while len(landmarks) < len(LANDMARKS):
                    landmarks.append([0, 0])  # 기본값으로 (0, 0) 추가
            
            # 시퀀스 형식으로 변환
            sequence = np.array(landmarks).reshape(1, -1, input_size)  # (1, sequence_length, input_size)
            sequence_tensor = torch.FloatTensor(sequence).to(device)

            with torch.no_grad():
                output = model(sequence_tensor)
                _, predicted_class = torch.max(output, 1)
                predictions.append(predicted_class.item())

            # 예측 클래스를 비디오 프레임에 표시
            label = LABELS[predicted_class.item()]
            cv2.putText(frame, f'GRU pred :{label}', (10, 80),
                        cv2.FONT_HERSHEY_SIMPLEX, 3, (0, 255, 0), 3)

            # 랜드마크 그리기
            mp.solutions.drawing_utils.draw_landmarks(frame, results.pose_landmarks,
                                                       mp_pose.POSE_CONNECTIONS)

        # 비디오 프레임 저장
        resize_frame = cv2.resize(frame, (1920, 1080))
        out.write(resize_frame)

        cv2.imshow('Fall Detection', resize_frame)
        
        # 'q' 키를 누르면 종료
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()  # VideoWriter 객체 해제
    cv2.destroyAllWindows()
    
    return predictions

def extract_landmarks(results):
    landmarks = []
    for landmark in LANDMARKS:
        if results.pose_landmarks.landmark[landmark].visibility < 0.5:
            continue  # 가시성이 낮은 경우 무시
        
        x = results.pose_landmarks.landmark[landmark].x
        y = results.pose_landmarks.landmark[landmark].y
        landmarks.append([x, y])  # x와 y 좌표만 추가 (z는 필요시 추가 가능)

    return landmarks

# 모델 로드 및 비디오 처리
model_path = 'only_mediapipe_except_normalization.pt'
video_path = 'D:\\human_fall\\re_video\\training\\Y\\00704_H_D_FY_C4.mp4'
output_path = 'C:\\Users\\user\\Desktop\\prj_sample_vid\\GRU_pred_inputsize22_1.mp4'  # 저장할 비디오 파일 경로
input_size = 22

model = load_model(model_path, input_size)
predictions = process_video(video_path, model, output_path)

print("저장완료")

NameError: name 'mp' is not defined