### mediapipe의 좌표와 sensordata로 재정의된 3개의 클래스 학습
* danger 클래스 추가
* input_size = 22

In [6]:
import os
import json
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import cv2
import mediapipe as mp
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy.interpolate import interp1d
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import f1_score, confusion_matrix
from scipy.spatial import distance
from tqdm import tqdm

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [None]:
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]

# 데이터셋 클래스 정의
class FallSequenceDataset(Dataset):
    def __init__(self, json_files, sequence_length=3):
        self.sequence_length = sequence_length
        self.sequences = []
        self.labels = []
        #self.scaler = StandardScaler()
        print(f"LANDMARKS length: {len(LANDMARKS)}")
        
        if self.sequences:
            print(f"sequence shape : {self.sequences[0].shape}")
            print(f"랜드마크 수 : {len(LANDMARKS)}")
            print(f"Features / landmark: {self.sequences[0].shape[1] // len(LANDMARKS)}")
            print(f"전체 features per frame: {self.sequences[0].shape[1]}")
        else:
            print("시퀀스 생성 실패")
    
        all_landmarks = []
        
        for json_file in json_files:
            print(f'Processing file: {json_file}')
            with open(json_file, 'r') as f:
                data = json.load(f)
            
            frames = list(data['pose_data'].values())
            
            for i in range(0, len(frames) - self.sequence_length + 1):
                sequence = frames[i:i+self.sequence_length]
                landmarks = []
                
                for frame in sequence:
                    frame_landmarks = []
                    for landmark in LANDMARKS:
                        frame_landmarks.extend([
                            frame[f'landmark_{landmark}']['x'],
                            frame[f'landmark_{landmark}']['y']
                        ])
                    landmarks.append(frame_landmarks)
                
                all_landmarks.extend(landmarks)
                
                # 마지막 프레임의 클래스를 레이블로 사용
                label = 0 if frame['class'] == 'Normal' else (1 if frame['class'] == 'Danger' else 2)
                
                self.sequences.append(landmarks)
                self.labels.append(label)
        
        # 전체 데이터 정규화
        #all_landmarks = np.array(all_landmarks)
        #all_landmarks_scaled = self.scaler.fit_transform(all_landmarks)
        
        # 정규화된 데이터를 다시 시퀀스로 재구성
        #for i in range(len(self.sequences)):
        #    start = i * self.sequence_length
        #    end = start + self.sequence_length
        #    self.sequences[i] = all_landmarks_scaled[start:end]
        
        if self.sequences:
            print(f"sequence shape: {len(self.sequences[0])}")
        else:
            print("sequences 생성 실패")

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        if idx >= len(self.sequences):
            raise IndexError(f"Index {idx} out of range. Dataset length: {len(self.sequences)}")
        sequence = self.sequences[idx]
        return torch.FloatTensor(sequence), torch.LongTensor([self.labels[idx]]).squeeze()

# GRU 모델 정의
class FallDetectionGRU(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, output_size=3, dropout=0.5):
        super(FallDetectionGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

# 데이터 로드 및 전처리
json_folder = 'D:\\human_fall\\re_landmark\\addition_json'
json_files = [os.path.join(json_folder, f) for f in os.listdir(json_folder) if f.endswith('.json')]
dataset = FallSequenceDataset(json_files)

# 데이터셋을 학습 및 검증 세트로 분할
train_indices, val_indices = train_test_split(range(len(dataset)), test_size=0.2)
train_dataset = torch.utils.data.Subset(dataset, train_indices)
val_dataset = torch.utils.data.Subset(dataset, val_indices)

# 데이터 로더 생성 전에 클래스 가중치 계산
class_weights = compute_class_weight('balanced', classes=np.unique(dataset.labels), y=dataset.labels)
class_weights = torch.FloatTensor(class_weights).to(device)

# 손실 함수에 가중치 적용
criterion = nn.CrossEntropyLoss(weight=class_weights)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

if len(dataset) > 0:
    sample_sequence, sample_label = dataset[0]
    input_size = sample_sequence.shape[1]
    print(f'input_size :', {input_size})
    model = FallDetectionGRU(input_size).to(device)
else:
    print("데이터 없음")
    exit()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)
num_epochs = 500
best_loss = float('inf')
patience = 15
no_improve = 0

for epoch in range(num_epochs):
    model.train()
    total_loss_train = 0
    
    for sequences, labels in train_loader:
        sequences, labels = sequences.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(sequences)
        loss_train = criterion(outputs, labels.view(-1))
        loss_train.backward()
        optimizer.step()
        
        total_loss_train += loss_train.item()
    
    avg_loss_train = total_loss_train / len(train_loader)

    # 검증 단계
    model.eval()
    total_loss_val = 0
    
    with torch.no_grad():
        for sequences_val, labels_val in val_loader:
            sequences_val, labels_val = sequences_val.to(device), labels_val.to(device)
            outputs_val = model(sequences_val)
            loss_val = criterion(outputs_val, labels_val.view(-1))
            total_loss_val += loss_val.item()
    
    avg_loss_val = total_loss_val / len(val_loader)

    print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_loss_train:.4f}, Val Loss: {avg_loss_val:.4f}')
    scheduler.step(avg_loss_val)
    
    if avg_loss_val < best_loss:
        best_loss = avg_loss_val
        no_improve = 0
        torch.save(model.state_dict(), 'mediapipe_sensordata_except_normalization.pt')
    else:
        no_improve += 1
    
    if no_improve >= patience:
        print("Early stopping")
        break

def calculate_metrics(model, data_loader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for sequences, labels in data_loader:
            sequences, labels = sequences.to(device), labels.to(device)
            outputs = model(sequences)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    f1 = f1_score(all_labels, all_preds, average='weighted')
    cm = confusion_matrix(all_labels, all_preds)
    return f1, cm

# 학습 루프 내에서 성능 지표 계산
train_f1, train_cm = calculate_metrics(model, train_loader)
val_f1, val_cm = calculate_metrics(model, val_loader)
print(f'Train F1: {train_f1:.4f}, Val F1: {val_f1:.4f}')
print(f'Train CM:\n{train_cm}\nVal CM:\n{val_cm}')

print("학습 완료")

# F1 스코어와 혼동 행렬을 파일로 저장
def save_metrics(f1_train, cm_train, f1_val, cm_val, file_path='mediapipe_sensordata_except_normalization.pt.txt'):
    with open(file_path, 'w') as f:
        f.write(f'Train F1: {f1_train:.4f}\n')
        f.write(f'Val F1: {f1_val:.4f}\n')
        f.write(f'Train Confusion Matrix:\n{cm_train}\n')
        f.write(f'Val Confusion Matrix:\n{cm_val}\n')

# 결과 저장
save_metrics(train_f1, train_cm, val_f1, val_cm, file_path='mediapipe_sensordata_except_normalization.pt.txt')

print("저장 완료")

### Mediapipe의 랜드마크만 학습시켰을 때의 비디오 테스트
* input_size = 22
* sequence_length = 3

In [None]:
# 랜드마크 인덱스 정의 
LANDMARKS = [0, 11, 12, 15, 16, 23, 24, 25, 26, 27, 28]

class FallDetectionGRU(nn.Module):
    def __init__(self, input_size, hidden_size=64, num_layers=2, output_size=3, dropout=0.5):
        super(FallDetectionGRU, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.dropout(out[:, -1, :])
        out = self.fc(out)
        return out

# 모델 초기화 및 가중치 로드
input_size = 22 
model = FallDetectionGRU(input_size).to(device)
model.load_state_dict(torch.load('D:\\project\\prjvenv\\GRU\\GRU_pts\\2. mediapipe & sensordata\\mediapipe_sensordata_except_normalization.pt', map_location=device))
model.eval()

mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils

video_path = 'D:\\human_fall\\re_video\\validation\\Y\\00170_H_A_SY_C4.mp4'
cap = cv2.VideoCapture(video_path)

output_path = 'C:\\Users\\user\\Desktop\\prj_sample_vid\\inputsize_22.mp4'
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  
out = cv2.VideoWriter(output_path, fourcc, 30.0, (1920, 1080))

sequence_length = 3  # 시퀀스 길이 설정 (훈련 시 사용한 값과 일치해야 함)
data_sequence = []

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # BGR 이미지를 RGB로 변환 및 랜드마크 추출
    image_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = pose.process(image_rgb)

    if results.pose_landmarks:
        landmarks = []
        
        # 랜드마크 추출
        for landmark_idx in LANDMARKS:
            landmark = results.pose_landmarks.landmark[landmark_idx]
            landmarks.append([landmark.x, landmark.y])
        
        # 랜드마크 배열 변환 및 시퀀스 추가
        landmarks_array = np.array(landmarks).flatten()
        
        if len(data_sequence) < sequence_length:
            data_sequence.append(landmarks_array)
        
        if len(data_sequence) == sequence_length:
            input_data = np.array(data_sequence).reshape(1, sequence_length, -1)
            input_tensor = torch.FloatTensor(input_data).to(device)

            with torch.no_grad():
                outputs = model(input_tensor)
                predicted_label_id = torch.argmax(outputs).item()

                # 예측된 클래스 이름 출력
                label_name = {0: 'Normal', 1: 'Danger', 2: 'Fall'}
                predicted_label_name = label_name[predicted_label_id]
                if predicted_label_name == 'Normal' : 
                    cv2.putText(frame, f"GRU pred :{predicted_label_name}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX,
                            5, (0, 255, 0), 4)
                elif predicted_label_name == 'Danger' :
                    cv2.putText(frame, f"GRU pred :{predicted_label_name}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX,
                            5, (0, 255, 255), 4)
                else : 
                    cv2.putText(frame, f"GRU pred :{predicted_label_name}", (50, 150), cv2.FONT_HERSHEY_SIMPLEX,
                            5, (0, 0, 255), 4)
            
            # 시퀀스 초기화 (이전 시퀀스를 제거하고 새로운 시퀀스를 시작할 수 있음)
            data_sequence.pop(0)  # 첫 번째 프레임 제거
        
        # 랜드마크와 연결 그리기
        mp_drawing.draw_landmarks(frame,
                                   results.pose_landmarks,
                                   mp_pose.POSE_CONNECTIONS,
                                   landmark_drawing_spec=mp_drawing.DrawingSpec(color=(255, 0, 0), thickness=2),
                                   connection_drawing_spec=mp_drawing.DrawingSpec(color=(255, 255, 255), thickness=2))
    
    resized_frame = cv2.resize(frame, (1920, 1080))
    out.write(resized_frame)
    # 비디오 프레임 출력
    cv2.imshow('Fall Detection', resized_frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# 리소스 해제
cap.release()
cv2.destroyAllWindows()