In [1]:
import os
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

#MoveNet 모델 로드
model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
movenet = model.signatures['serving_default']

In [2]:
def run_inference(movenet, image):
    """이미지에서 포즈 키포인트를 추출하는 함수"""
    # Resize and pad the image to keep the aspect ratio and fit the expected size.
    input_image = tf.image.resize_with_pad(tf.expand_dims(image, axis=0), 192, 192)
    input_image = tf.cast(input_image, dtype=tf.int32)
    #results = movenet(input_image)
    
    # Run model inference.
    keypoints_with_scores = movenet(input_image)
    #print(keypoints_with_scores)
    #print(type(keypoints_with_scores))
    return keypoints_with_scores

In [3]:
def extract_keypoints_from_video(video_path):
    """동영상에서 프레임별로 포즈 키포인트를 추출하는 함수"""
    cap = cv2.VideoCapture(video_path)
    keypoints_list = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        keypoints = run_inference(movenet, frame_rgb)
        #print(type(keypoints))
        keypoints_list.append(keypoints)
    cap.release()
    return keypoints_list

하나의 동영상 파일로 실험 해보기 > 성능 안좋으면 나중에 넘파이로 해서 다시해보기

In [None]:
# # 비디오 파일 경로
# video_path = '/Users/diana/Desktop/BabyposeModel/capstone2_SEDA/arching_back_1.mp4'  # 비디오 파일 경로를 적절히 설정해야 합니다.

# keypoints_list = extract_keypoints_from_video(video_path)
# #print(keypoints_list)
# # 딕셔너리를 리스트로 변환한다.
# #keypoints_list = [d['output_0'].numpy().tolist() for d in keypoints_list]
# # EagerTensor를 넘파이 배열로 변환 후, 4차원 -> 2차원 변경
# keypoints_array = [d['output_0'].numpy().squeeze() for d in keypoints_list]
# # 넘파이 배열을 리스트로 변환
# keypoints_list = [keypoints.tolist() for keypoints in keypoints_array]

# # 먼저 각 좌표별로 평균을 계산
# mean_keypoints = [[sum(pos) / len(keypoints_list) for pos in zip(*frame)] for frame in zip(*[kp for kp in keypoints_list])]

# #keypoints_list_arr = np.array(keypoints_list)
# #keypoints_avg = average_keypoints(keypoints_list)
# #print(keypoints_list[0].keys())
# #print(keypoints_list[0].values())
# #print(type(keypoints_list[0]))
# #print(keypoints_list[0].shape)
# #print(len(keypoints_list))
# #print(keypoints_list)
# print(len(mean_keypoints))
# print(mean_keypoints)


In [5]:
import pickle

# 동영상이 저장된 디렉토리 경로
video_root = '/Users/diana/Downloads/BabyPose-main/data'
video_dirs = ['arching_back', 'head_banging', 'kicking_legs', 'rubbing_eye', 'stretching', 'sucking_fingers']

keypoints_list = []
labels = []

# 클래스별로 동영상 처리
for label, class_dir in enumerate(video_dirs):
    class_path = os.path.join(video_root, class_dir)
    video_files = [f for f in os.listdir(class_path) if f.endswith('.mp4')]

    for video_file in tqdm.tqdm(video_files, desc=f'Processing {class_dir}'):
        video_path = os.path.join(class_path, video_file)
        keypoints = extract_keypoints_from_video(video_path)
        # EagerTensor를 넘파이 배열로 변환 후, 4차원 -> 2차원 변경
        keypoints_array = [d['output_0'].numpy().squeeze() for d in keypoints]
        # 넘파이 배열을 리스트로 변환
        keypoints_list_mod = [keypoints.tolist() for keypoints in keypoints_array]
        
        keypoints_list.append(keypoints_list_mod)
        labels.append(label)  # 클래스 인덱스를 레이블로 사용

# `features`와 `labels`를 `keypoints_data_with_score.pkl` 파일로 저장
with open('keypoints_data.pkl', 'wb') as f:
    pickle.dump({'keypoints': keypoints_list, 'labels': labels}, f)

print("Finished processing and saving data.")

Processing arching_back: 100%|██████████| 19/19 [00:22<00:00,  1.20s/it]
Processing head_banging: 100%|██████████| 22/22 [00:52<00:00,  2.38s/it]
Processing kicking_legs: 100%|██████████| 23/23 [01:00<00:00,  2.65s/it]
Processing rubbing_eye: 100%|██████████| 26/26 [01:08<00:00,  2.62s/it]
Processing stretching: 100%|██████████| 23/23 [00:52<00:00,  2.29s/it]
Processing sucking_fingers: 100%|██████████| 32/32 [01:39<00:00,  3.10s/it]

Finished processing and saving data.





실시간 증강 도전?

In [None]:
import random

def rotate_image(image, angle):
    """이미지를 주어진 각도로 회전시키는 함수"""
    (h, w) = image.shape[:2]
    (cX, cY) = (w // 2, h // 2)

    M = cv2.getRotationMatrix2D((cX, cY), angle, 1.0)
    cos = np.abs(M[0, 0])
    sin = np.abs(M[0, 1])

    # 새로운 이미지의 바운딩 박스 계산
    nW = int((h * sin) + (w * cos))
    nH = int((h * cos) + (w * sin))

    # 회전 중심을 조정하여 회전
    M[0, 2] += (nW / 2) - cX
    M[1, 2] += (nH / 2) - cY

    return cv2.warpAffine(image, M, (nW, nH))

In [118]:
def run_inference(movenet, image):
    """이미지에서 포즈 키포인트를 추출하는 함수"""
    input_image = tf.image.resize_with_pad(tf.expand_dims(image, axis=0), 192, 192)
    input_image = tf.cast(input_image, dtype=tf.int32)
    
    # Run model inference
    keypoints_with_scores = movenet(input_image)
    return keypoints_with_scores['output_0'].numpy()

def extract_keypoints_from_video_frames(video_path, movenet):
    cap = cv2.VideoCapture(video_path)
    keypoints_list = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # 현재 프레임에서 MoveNet을 사용하여 키포인트 추출
        keypoints = run_inference(movenet, frame)
        keypoints_list.append(keypoints)
    
    cap.release()
    return keypoints_list

def augment_frames_and_extract_keypoints(video_path, movenet):
    cap = cv2.VideoCapture(video_path)
    augmented_keypoints = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # 프레임을 좌우 반전 증강
        frame_flipped = cv2.flip(frame, 1)
        
        # 프레임을 랜덤한 각도로 회전 증강 (여기서는 -10도에서 10도 사이의 각도를 랜덤 선택)
        angle = random.uniform(-10, 10)
        frame_rotated = rotate_image(frame, angle)
        
        # 증강된 프레임에서 MoveNet을 사용하여 키포인트 추출
        keypoints_flipped = run_inference(movenet, frame_flipped)
        augmented_keypoints.append(keypoints_flipped)
        
        # 회전된 프레임에서 키포인트 추출
        keypoints_rotated = run_inference(movenet, frame_rotated)
        augmented_keypoints.append(keypoints_rotated)
    
    cap.release()
    return augmented_keypoints

# 아래는 데이터 로딩 및 처리의 예시 코드입니다.
video_root = '/Users/diana/Downloads/BabyPose-main/data'
video_dirs = ['arching_back', 'head_banging', 'kicking_legs', 'rubbing_eye', 'stretching', 'sucking_fingers']

keypoints_list = []
labels = []

for label, class_dir in enumerate(video_dirs):
    class_path = os.path.join(video_root, class_dir)
    video_files = [f for f in os.listdir(class_path) if f.endswith('.mp4')]

    for video_file in tqdm.tqdm(video_files, desc=f'Processing {class_dir}'):
        video_path = os.path.join(class_path, video_file)
        
        # 원본 동영상에서 프레임별로 키포인트 추출
        original_keypoints = extract_keypoints_from_video_frames(video_path, movenet)
        keypoints_list.append(original_keypoints)
        labels.append(label)  # 원본 데이터에 대한 레이블 추가

        # 증강된 동영상에서 프레임별로 키포인트 추출
        augmented_keypoints = augment_frames_and_extract_keypoints(video_path, movenet)
        keypoints_list.append(augmented_keypoints)
        labels.append(label)  # 증강된 데이터에 대한 레이블도 추가

# 데이터 저장
with open('keypoints_data_augmented.pkl', 'wb') as f:
    pickle.dump({'keypoints': keypoints_list, 'labels': labels}, f)

print("Finished processing and saving augmented data.")


Processing arching_back:   0%|          | 0/19 [00:00<?, ?it/s]

Processing arching_back: 100%|██████████| 19/19 [01:06<00:00,  3.51s/it]
Processing head_banging: 100%|██████████| 22/22 [02:40<00:00,  7.29s/it]
Processing kicking_legs: 100%|██████████| 23/23 [03:03<00:00,  7.99s/it]
Processing rubbing_eye: 100%|██████████| 26/26 [03:16<00:00,  7.58s/it]
Processing stretching: 100%|██████████| 23/23 [02:41<00:00,  7.04s/it]
Processing sucking_fingers: 100%|██████████| 32/32 [05:10<00:00,  9.70s/it]


Finished processing and saving augmented data.


각 동영상 별로 평균 좌표, 신뢰도를 구하는 함수

In [119]:
def calculate_mean_keypoints_from_file(keypoints_data):
    
    # 모든 동영상에 대한 평균 키포인트 계산
    mean_keypoints_all_videos = []
    for keypoints_list in keypoints_data['keypoints']:
        # 각 동영상에 대한 키포인트 리스트에서 평균 계산
        mean_keypoints = [[sum(pos) / len(keypoints_list) for pos in zip(*frame)] for frame in zip(*keypoints_list)]
        mean_keypoints_all_videos.append(mean_keypoints)
    
    return mean_keypoints_all_videos, keypoints_data['labels']

키포인트 변화량 계산하는 함수

In [120]:
def calculate_keypoint_changes(keypoints_data):
    # 변경된 부분: 이미 로드된 키포인트 데이터를 직접 사용
    # 키포인트 데이터는 각 동영상의 프레임별 키포인트 리스트를 포함하는 리스트

    changes_list = []  # 변화량을 저장할 리스트 초기화

    for keypoints_list in keypoints_data['keypoints']:
        changes = []  # 개별 동영상의 키포인트 변화량을 저장할 리스트
        prev_keypoints = None

        for keypoints in keypoints_list:
            keypoints = np.array(keypoints)
            if prev_keypoints is not None:
                # 현재 프레임과 이전 프레임의 키포인트 사이의 변화량 계산
                change = np.abs(keypoints - prev_keypoints)
                changes.append(change)
            prev_keypoints = keypoints

        # 모든 변화량의 평균 계산
        if changes:
            mean_changes = np.mean(changes, axis=0)
        else:
            # 변화량이 없는 경우, 0으로 채워진 배열 반환
            mean_changes = np.zeros_like(keypoints_list[0])

        changes_list.append(mean_changes)

    return changes_list

키포인트 각도 변화량 계산하는 함수-> 이거 3개 키포인트만 되니까 유의미한 키포인트 추출해서 쓰도록 조정해보기

In [121]:
def calculate_angle(point1, point2, point3):
    """
    세 점을 이용하여 두 벡터 사이의 각도를 계산합니다.
    :param point1, point2, point3: 각 점의 좌표를 나타내는 (x, y) 튜플이나 리스트.
    :return: 두 벡터 사이의 각도(도).
    """
    # 벡터 v1과 v2 생성
    v1 = np.array(point1) - np.array(point2)
    v2 = np.array(point3) - np.array(point2)
    
    # 벡터의 내적과 노름(크기)을 사용하여 각도(라디안) 계산
    angle_rad = np.arccos(np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2)))
    
    # 각도를 도로 변환
    angle_deg = np.degrees(angle_rad)
    
    return angle_deg

# 평균
def calculate_angle_changes(keypoints_data, point_indices):
    angle_changes_list = []
    for keypoints_list in keypoints_data['keypoints']:
        angles = []
        for frame_keypoints in keypoints_list:
            # 키포인트 데이터가 충분한지 확인
            if len(frame_keypoints) > max(point_indices):
                p1 = frame_keypoints[point_indices[0]][:2]  # x, y 좌표만 사용
                p2 = frame_keypoints[point_indices[1]][:2]
                p3 = frame_keypoints[point_indices[2]][:2]
                angle = calculate_angle(p1, p2, p3)
                angles.append(angle)
            else:
                # 충분한 데이터가 없는 경우 계산에서 제외
                continue
        
        if angles:  # 각도 데이터가 있을 경우에만 계산
            angle_changes = np.abs(np.diff(angles))
            mean_angle_change = np.mean(angle_changes)
            angle_changes_list.append(mean_angle_change)
        else:
            # 각도 데이터가 없는 경우 0으로 처리
            angle_changes_list.append(0)
    
    return np.array(angle_changes_list)

# 최솟값
def calculate_min_angle_changes(keypoints_data, point_indices):
    min_angle_changes_list = []
    for keypoints_list in keypoints_data['keypoints']:
        angles = []
        for frame_keypoints in keypoints_list:
            if len(frame_keypoints) > max(point_indices):
                p1 = frame_keypoints[point_indices[0]][:2]  # x, y 좌표만 사용
                p2 = frame_keypoints[point_indices[1]][:2]
                p3 = frame_keypoints[point_indices[2]][:2]
                angle = calculate_angle(p1, p2, p3)
                angles.append(angle)

        if angles:
            angle_changes = np.abs(np.diff(angles))
            min_angle_change = np.min(angle_changes) if len(angle_changes) > 0 else 0
            min_angle_changes_list.append(min_angle_change)
        else:
            min_angle_changes_list.append(0)
    
    return np.array(min_angle_changes_list)

# 최댓값
def calculate_max_angle_changes(keypoints_data, point_indices):
    max_angle_changes_list = []
    for keypoints_list in keypoints_data['keypoints']:
        angles = []
        for frame_keypoints in keypoints_list:
            if len(frame_keypoints) > max(point_indices):
                p1 = frame_keypoints[point_indices[0]][:2]  # x, y 좌표만 사용
                p2 = frame_keypoints[point_indices[1]][:2]
                p3 = frame_keypoints[point_indices[2]][:2]
                angle = calculate_angle(p1, p2, p3)
                angles.append(angle)

        if angles:
            angle_changes = np.abs(np.diff(angles))
            max_angle_change = np.max(angle_changes) if len(angle_changes) > 0 else 0
            max_angle_changes_list.append(max_angle_change)
        else:
            max_angle_changes_list.append(0)
    
    return np.array(max_angle_changes_list)



움직임 패턴의 자기상관성을 계산하는 함수:
평균 자기상관성, 
자기상관성의 표준편차,
피크 수(평균 자기상관성 이상의 값을 가지는 피크의 수를 계산합니다. 이는 반복되는 패턴의 빈도를 나타낼 수 있습니다.)

In [122]:
def calculate_enhanced_autocorrelation_features(keypoints_data):
    features_list = []  # 각 동영상의 향상된 자기상관성 특성을 저장할 리스트

    for keypoints_list in keypoints_data['keypoints']:
        changes = []
        prev_keypoints = None
        for keypoints in keypoints_list:
            keypoints = np.array(keypoints)
            if prev_keypoints is not None:
                change = np.linalg.norm(keypoints - prev_keypoints)
                changes.append(change)
            prev_keypoints = keypoints

        if changes:
            changes = np.array(changes)
            autocorrelation = np.correlate(changes - np.mean(changes), changes - np.mean(changes), mode='full')
            autocorrelation = autocorrelation[autocorrelation.size // 2:]  # 자기상관성 값 중 양의 지연만 고려

            # 향상된 특성 계산
            mean_autocorrelation = np.mean(autocorrelation)
            std_autocorrelation = np.std(autocorrelation)
            peak_count = np.sum(autocorrelation > (mean_autocorrelation + std_autocorrelation))  # 평균 이상의 피크 수

            features = [mean_autocorrelation, std_autocorrelation, peak_count]
        else:
            features = [0, 0, 0]

        features_list.append(features)

    return features_list

FFT (고속 푸리에 변환)를 이용해 시간 영역의 키포인트 변화량 데이터를 주파수 영역으로 변환하여, 그 결과로부터 주요 특성을 추출하는 방법

In [93]:
import numpy as np

def calculate_fft_features_from_keypoints(keypoints_data):
    fft_features_list = []

    for keypoints_list in keypoints_data['keypoints']:
        keypoints_array = np.array(keypoints_list)  # (프레임 수, 키포인트 수, 좌표)
        
        # 원본 키포인트 데이터의 FFT 특성 계산
        fft_features_video = []
        for i in range(keypoints_array.shape[1]):  # 각 키포인트에 대해
            x_coordinates = keypoints_array[:, i, 0]  # x 좌표
            y_coordinates = keypoints_array[:, i, 1]  # y 좌표

            # FFT 수행
            fft_x = np.fft.fft(x_coordinates)
            fft_y = np.fft.fft(y_coordinates)

            # 주파수 성분의 크기 계산
            magnitude_x = np.abs(fft_x)
            magnitude_y = np.abs(fft_y)

            # 주요 주파수 성분의 크기 (기본 주파수 제외)
            primary_magnitude_x = np.max(magnitude_x[1:])
            primary_magnitude_y = np.max(magnitude_y[1:])

            # 특성에 추가
            fft_features_video.extend([primary_magnitude_x, primary_magnitude_y])

        fft_features_list.append(fft_features_video)

    return fft_features_list


pickle 파일 로드

In [9]:
pkl_file_path = '/Users/diana/Desktop/BabyposeModel/capstone2_SEDA/keypoints_data.pkl'

# pickle 파일 로드
with open(pkl_file_path, 'rb') as f:
     keypoints_data = pickle.load(f)

In [123]:
pkl_file_path = '/Users/diana/Desktop/BabyposeModel/capstone2_SEDA/keypoints_data_augmented.pkl'

# pickle 파일 로드
with open(pkl_file_path, 'rb') as f:
     keypoints_data = pickle.load(f)

feature 제작 공장

In [94]:
fft_features_list = calculate_fft_features_from_keypoints(keypoints_data)

In [124]:
mean_keypoints_all_videos, labels = calculate_mean_keypoints_from_file(keypoints_data)
changes_list = calculate_keypoint_changes(keypoints_data)
autocorrelation_list = calculate_enhanced_autocorrelation_features(keypoints_data)

back_angle_changes_list1 = calculate_angle_changes(keypoints_data, (6,12,16))
back_angle_changes_list2 = calculate_angle_changes(keypoints_data, (5,11,15))
head_angle_changes_list1 = calculate_angle_changes(keypoints_data, (0,6,12))
head_angle_changes_list2 = calculate_angle_changes(keypoints_data, (0,5,11))
leg_angle_changes_list1 = calculate_angle_changes(keypoints_data, (12,14,16))
leg_angle_changes_list2 = calculate_angle_changes(keypoints_data, (11,13,15))
eye_angle_changes_list1 = calculate_angle_changes(keypoints_data, (1,5,9))
eye_angle_changes_list2 = calculate_angle_changes(keypoints_data, (2,6,10))
strech_angle_changes_list1 = calculate_angle_changes(keypoints_data, (5,8,10))
strech_angle_changes_list2 = calculate_angle_changes(keypoints_data, (6,7,9))
finger_angle_changes_list1 = calculate_angle_changes(keypoints_data, (0,8,10))
finger_angle_changes_list2 = calculate_angle_changes(keypoints_data, (0,7,9))

back_min_angle_changes_list1 = calculate_min_angle_changes(keypoints_data, (6,12,16))
back_min_angle_changes_list2 = calculate_min_angle_changes(keypoints_data, (5,11,15))
head_min_angle_changes_list1 = calculate_min_angle_changes(keypoints_data, (0,6,12))
head_min_angle_changes_list2 = calculate_min_angle_changes(keypoints_data, (0,5,11))
leg_min_angle_changes_list1 = calculate_min_angle_changes(keypoints_data, (12,14,16))
leg_min_angle_changes_list2 = calculate_min_angle_changes(keypoints_data, (11,13,15))
eye_min_angle_changes_list1 = calculate_min_angle_changes(keypoints_data, (1,5,9))
eye_min_angle_changes_list2 = calculate_min_angle_changes(keypoints_data, (2,6,10))
strech_min_angle_changes_list1 = calculate_min_angle_changes(keypoints_data, (5,8,10))
strech_min_angle_changes_list2 = calculate_min_angle_changes(keypoints_data, (6,7,9))
finger_min_angle_changes_list1 = calculate_min_angle_changes(keypoints_data, (0,8,10))
finger_min_angle_changes_list2 = calculate_min_angle_changes(keypoints_data, (0,7,9))

back_max_angle_changes_list1 = calculate_max_angle_changes(keypoints_data, (6,12,16))
back_max_angle_changes_list2 = calculate_max_angle_changes(keypoints_data, (5,11,15))
head_max_angle_changes_list1 = calculate_max_angle_changes(keypoints_data, (0,6,12))
head_max_angle_changes_list2 = calculate_max_angle_changes(keypoints_data, (0,5,11))
leg_max_angle_changes_list1 = calculate_max_angle_changes(keypoints_data, (12,14,16))
leg_max_angle_changes_list2 = calculate_max_angle_changes(keypoints_data, (11,13,15))
eye_max_angle_changes_list1 = calculate_max_angle_changes(keypoints_data, (1,5,9))
eye_max_angle_changes_list2 = calculate_max_angle_changes(keypoints_data, (2,6,10))
strech_max_angle_changes_list1 = calculate_max_angle_changes(keypoints_data, (5,8,10))
strech_max_angle_changes_list2 = calculate_max_angle_changes(keypoints_data, (6,7,9))
finger_max_angle_changes_list1 = calculate_max_angle_changes(keypoints_data, (0,8,10))
finger_max_angle_changes_list2 = calculate_max_angle_changes(keypoints_data, (0,7,9))

# 결과 확인 (예시로 첫 번째 동영상의 평균 키포인트 출력)
# print(angle_changes_list2[0])

In [30]:
# def square_minus(data1, data2):
#     result = (data1 - data2) ** 2
    
#     return result

In [125]:
features = []
mean_keypoints_all_videos = np.array(mean_keypoints_all_videos)
changes_list = np.array(changes_list)
autocorrelation_list = np.array(autocorrelation_list)
fft_features_list = np.array(fft_features_list)

for i in range(len(mean_keypoints_all_videos)):
    combined_feature = np.concatenate([mean_keypoints_all_videos[i].flatten(), changes_list[i].flatten(), autocorrelation_list[i].flatten(),
    # fft_features_list[i].flatten(),
    [back_max_angle_changes_list1[i] - back_min_angle_changes_list1[i],
    back_max_angle_changes_list2[i] - back_min_angle_changes_list2[i],
    head_max_angle_changes_list1[i] - head_min_angle_changes_list1[i],
    head_max_angle_changes_list2[i] - head_min_angle_changes_list2[i],
    leg_max_angle_changes_list1[i] - leg_min_angle_changes_list1[i],
    leg_max_angle_changes_list2[i] - leg_min_angle_changes_list2[i],
    eye_max_angle_changes_list1[i] - eye_min_angle_changes_list1[i],
    eye_max_angle_changes_list2[i] - eye_min_angle_changes_list2[i],
    strech_max_angle_changes_list1[i] - strech_min_angle_changes_list1[i],
    strech_max_angle_changes_list2[i] - strech_min_angle_changes_list2[i],
    finger_max_angle_changes_list1[i] - finger_min_angle_changes_list1[i],
    finger_max_angle_changes_list2[i] - finger_min_angle_changes_list2[i]]])
    
    features.append(combined_feature)


In [31]:
# features = []
# mean_keypoints_all_videos = np.array(mean_keypoints_all_videos)
# changes_list = np.array(changes_list)

# for i in range(len(mean_keypoints_all_videos)):
#     combined_feature = np.concatenate([mean_keypoints_all_videos[i].flatten(), changes_list[i].flatten(), 
#     [square_minus(back_max_angle_changes_list1[i], back_min_angle_changes_list1[i]),
#     square_minus(back_max_angle_changes_list2[i], back_min_angle_changes_list2[i]),
#     square_minus(head_max_angle_changes_list1[i], head_min_angle_changes_list1[i]),
#     square_minus(head_max_angle_changes_list2[i], head_min_angle_changes_list2[i]),
#     square_minus(leg_max_angle_changes_list1[i], leg_min_angle_changes_list1[i]),
#     square_minus(leg_max_angle_changes_list2[i], leg_min_angle_changes_list2[i]),
#     square_minus(eye_max_angle_changes_list1[i], eye_min_angle_changes_list1[i]),
#     square_minus(eye_max_angle_changes_list2[i], eye_min_angle_changes_list2[i]),
#     square_minus(strech_max_angle_changes_list1[i], strech_min_angle_changes_list1[i]),
#     square_minus(strech_max_angle_changes_list2[i], strech_min_angle_changes_list2[i]),
#     square_minus(finger_max_angle_changes_list1[i], finger_min_angle_changes_list1[i]),
#     square_minus(finger_max_angle_changes_list2[i], finger_min_angle_changes_list2[i])]])
    
#     features.append(combined_feature)


In [130]:
# 키포인트 평균, 키포인트 변화량, 각도 변화량을 결합
# a = np.array(mean_keypoints_all_videos)
# b = np.array(changes_list)
# c = np.array(angle_changes_list)
# print(a.shape, b.shape, c.shape)
# combined_feature = np.concatenate([mean_keypoints_all_videos, changes_list, angle_changes_list], axis=None)
# combined_feature = np.concatenate([mean_keypoints_all_videos, changes_list, angle_changes_list], axis=None)

# # 최종 feature 배열과 레이블 배열로 변환
# features = np.array(combined_feature)
# labels = np.array(labels)

SVM - 10 fold 81.28 11 fold 82.87

In [23]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import numpy as np

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# 하이퍼파라미터 그리드 설정
param_grid = {
    'C': [0.1, 1, 10, 100],  # C: 규제 매개변수
    'gamma': [1, 0.1, 0.01, 0.001],  # gamma: 커널의 계수
    'kernel': ['rbf', 'poly', 'sigmoid']  # kernel: 사용할 커널
}

# SVM 모델 초기화
svm = SVC(random_state=42)

# 그리드 서치와 스트래티파이드 k-폴드 교차 검증 설정
cv = StratifiedKFold(n_splits=11, shuffle=True, random_state=42)
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=cv, verbose=2, n_jobs=-1)

# 그리드 서치 실행
grid_search.fit(features_scaled, labels)

# 최적의 파라미터와 그 때의 점수 출력
print(f'Best parameters: {grid_search.best_params_}')
print(f'Best cross-validation score: {grid_search.best_score_*100:.2f} %')

# 최적의 모델로 평가
best_model = grid_search.best_estimator_


Fitting 11 folds for each of 48 candidates, totalling 528 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ........................C=0.1, gamma=

SVM - 9 fold 81.29 11 fold 83.52 %

In [74]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import numpy as np

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# 하이퍼파라미터 그리드 설정
param_grid = {
    'C': [0.1, 1, 10, 100],  # C: 규제 매개변수
    'gamma': [1, 0.1, 0.01, 0.001],  # gamma: 커널의 계수
    'kernel': ['rbf', 'poly', 'sigmoid']  # kernel: 사용할 커널
}

# SVM 모델 초기화
svm = SVC(random_state=42)

# 그리드 서치와 스트래티파이드 k-폴드 교차 검증 설정
cv = StratifiedKFold(n_splits = 11, shuffle=True, random_state=42)
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=cv, verbose=2, n_jobs=-1)

# 그리드 서치 실행
grid_search.fit(features_scaled, labels)

# 최적의 파라미터와 그 때의 점수 출력
print(f'Best parameters: {grid_search.best_params_}')
print(f'Best cross-validation score: {grid_search.best_score_*100:.2f} %')

# 최적의 모델로 평가
best_model = grid_search.best_estimator_


Fitting 11 folds for each of 48 candidates, totalling 528 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ........................C=0.1, gamma=

와 데이터 증강(좌우 반전하니까)바로 87.19퍼 달성

In [133]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import numpy as np

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# 하이퍼파라미터 그리드 설정
param_grid = {
    'C': [0.1, 1, 10, 100],  # C: 규제 매개변수
    'gamma': [1, 0.1, 0.01, 0.001],  # gamma: 커널의 계수
    'kernel': ['rbf', 'poly', 'sigmoid']  # kernel: 사용할 커널
}

# SVM 모델 초기화
svm = SVC(random_state=42)

# 그리드 서치와 스트래티파이드 k-폴드 교차 검증 설정
cv = StratifiedKFold(n_splits = 11, shuffle=True, random_state=42)
grid_search = GridSearchCV(estimator=svm, param_grid=param_grid, cv=cv, verbose=2, n_jobs=-1)

# 그리드 서치 실행
grid_search.fit(features_scaled, labels)

# 최적의 파라미터와 그 때의 점수 출력
print(f'Best parameters: {grid_search.best_params_}')
print(f'Best cross-validation score: {grid_search.best_score_*100:.2f} %')

# 최적의 모델로 평가
best_model = grid_search.best_estimator_

Fitting 11 folds for each of 48 candidates, totalling 528 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END ........................C=0.1, gamma=1, kernel=poly; total time=   0.0s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   0.0s
[CV] END .........................C=0.1, gamma

In [103]:
import pickle
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from sklearn.model_selection import train_test_split
from torch.nn.utils.rnn import pad_sequence

# 데이터 로딩 및 전처리
with open('keypoints_data.pkl', 'rb') as f:
    data = pickle.load(f)
keypoints = data['keypoints']
labels = data['labels']

# 키포인트 시퀀스를 텐서로 변환
keypoints_tensors = [torch.tensor(kp, dtype=torch.float32) for kp in keypoints]

# pad_sequence를 사용하여 모든 키포인트 시퀀스를 같은 길이로 패딩
keypoints_padded = pad_sequence(keypoints_tensors, batch_first=True)

labels_tensor = torch.tensor(labels, dtype=torch.long)

# 데이터셋 정의
class KeypointsDataset(Dataset):
    def __init__(self, keypoints, labels):
        self.keypoints = keypoints
        self.labels = labels
        
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.keypoints[idx], self.labels[idx]

# 데이터셋과 데이터 로더 생성
dataset = KeypointsDataset(keypoints_tensors, labels_tensor)
train_dataset, test_dataset = train_test_split(dataset, test_size=0.2, random_state=42)

def collate_fn(batch):
    keypoints, labels = zip(*batch)
    keypoints_padded = pad_sequence(keypoints, batch_first=True, padding_value=0.0)
    labels = torch.tensor(labels, dtype=torch.long)
    return keypoints_padded, labels

# 데이터 로더에 collate_fn 적용
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, collate_fn=collate_fn)

# LSTM 모델 정의
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

# 모델, 손실 함수, 옵티마이저 초기화
model = LSTMModel(input_size=51, hidden_size=128, num_layers=2, num_classes=len(set(labels)))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 40 

# 데이터 로더에서 배치를 가져온 후, LSTM에 적합한 형태로 데이터를 변환
# 학습 루프

for epoch in range(num_epochs):
    model.train()  # 모델을 학습 모드로 설정
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    
    for i, (keypoints, labels) in enumerate(train_loader):
        keypoints = keypoints.view(keypoints.size(0), keypoints.size(1), -1)  # 차원 변환
        outputs = model(keypoints)
        
        loss = criterion(outputs, labels)
        _, predicted = torch.max(outputs, 1)  # 가장 높은 점수를 가진 클래스 선택
        correct_predictions += (predicted == labels).sum().item()  # 정확한 예측 수 업데이트
        total_predictions += labels.size(0)  # 전체 예측 수 업데이트
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    epoch_loss = running_loss / len(train_loader)
    epoch_acc = correct_predictions / total_predictions  # 에포크 정확도 계산
    
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}')

# 이 코드는 각 에포크가 끝날 때마다 평균 손실과 정확도를 출력합니다.


        
        


Epoch 1/40, Loss: 1.7965, Accuracy: 0.1379
Epoch 2/40, Loss: 1.7908, Accuracy: 0.1552
Epoch 3/40, Loss: 1.7846, Accuracy: 0.2241
Epoch 4/40, Loss: 1.7826, Accuracy: 0.2241
Epoch 5/40, Loss: 1.7752, Accuracy: 0.2328
Epoch 6/40, Loss: 1.7783, Accuracy: 0.2328
Epoch 7/40, Loss: 1.7711, Accuracy: 0.2328
Epoch 8/40, Loss: 1.7695, Accuracy: 0.2241
Epoch 9/40, Loss: 1.7758, Accuracy: 0.2241
Epoch 10/40, Loss: 1.7692, Accuracy: 0.2241
Epoch 11/40, Loss: 1.7715, Accuracy: 0.2328
Epoch 12/40, Loss: 1.7654, Accuracy: 0.2328
Epoch 13/40, Loss: 1.7657, Accuracy: 0.2241
Epoch 14/40, Loss: 1.7673, Accuracy: 0.2328
Epoch 15/40, Loss: 1.7597, Accuracy: 0.2328
Epoch 16/40, Loss: 1.7640, Accuracy: 0.2328
Epoch 17/40, Loss: 1.7641, Accuracy: 0.2328
Epoch 18/40, Loss: 1.7568, Accuracy: 0.2328
Epoch 19/40, Loss: 1.7668, Accuracy: 0.2328
Epoch 20/40, Loss: 1.7662, Accuracy: 0.2328
Epoch 21/40, Loss: 1.7614, Accuracy: 0.2328
Epoch 22/40, Loss: 1.7561, Accuracy: 0.2328
Epoch 23/40, Loss: 1.7568, Accuracy: 0.23

실패 ㅠ

키포인트 변화량 최댓값 - 최솟값

In [47]:
# def calculate_keypoint_change_range(keypoints_data):
#     # 키포인트 데이터는 각 동영상의 프레임별 키포인트 리스트를 포함하는 리스트

#     change_ranges_list = []  # 변화량의 범위를 저장할 리스트 초기화

#     for keypoints_list in keypoints_data['keypoints']:
#         changes = []  # 개별 동영상의 키포인트 변화량을 저장할 리스트
#         prev_keypoints = None

#         for keypoints in keypoints_list:
#             keypoints = np.array(keypoints)
#             if prev_keypoints is not None:
#                 # 현재 프레임과 이전 프레임의 키포인트 사이의 변화량 계산
#                 change = np.abs(keypoints - prev_keypoints)
#                 changes.append(change)
#             prev_keypoints = keypoints

#         # 변화량의 최댓값과 최솟값의 차이 계산
#         if changes:
#             max_change = np.max(changes, axis=0)
#             min_change = np.min(changes, axis=0)
#             change_range = (max_change) ** 2 - (min_change) ** 2
#         else:
#             # 변화량이 없는 경우, 0으로 채워진 배열 반환
#             change_range = np.zeros_like(keypoints_list[0])

#         change_ranges_list.append(change_range)

#     return change_ranges_list