# import

In [128]:
import pandas as pd

# 보통 현재 작업 폴더(CWD)가 src 이므로 부모(프로젝트 루트)를 sys.path에 추가
import sys, os
from pathlib import Path

ROOT = Path.cwd().parent
if (ROOT / "utils").exists():
    sys.path.insert(0, str(ROOT))
else:
    # 혹시 구조가 더 깊을 때 대비
    ROOT2 = ROOT.parent
    if (ROOT2 / "utils").exists():
        sys.path.insert(0, str(ROOT2))

from utils.angular_util import compute_joint_angles, compute_connected_unit_vectors, flatten_vectors, compute_face_hand_vectors

## 수집한 데이터 합치기

In [129]:
HAND_COUNT = 21 * 3
POSE_COUNT = 11 * 3

## 헤더 추가
🚨 주의!!!! 이미 헤더가 있는 경우 또 추가될 수 있음!

In [130]:
# for i in range(7, 8):
#     data = pd.read_csv(f"../../data/sign_data/sign_data_{i}.csv")
    # data.insert(0, 'label', [i] * len(data))
    # columns = [i for i in range(len(data.columns) - 1)]
    # columns.insert(0, 'label')
    # data.columns = columns
    # data.to_csv(f"../../data/sign_data/sign_data_{i}.csv", index=False)

In [131]:
# i = 5
# data = pd.read_csv(f"../../data/sign_data/sign_data_{i}.csv", index_col=False)
# data.loc[:, 'label'] = i
# data.to_csv(f"../../data/sign_data/sign_data_{i}.csv", index=False)

## 병합

In [132]:
merged_data = pd.DataFrame()

for i, label in enumerate([0, 1, 2, 3, 4, 5, 6, 14, 15]):
    print(label)
    data = pd.read_csv(f"../../data/sign_data/sign_data_{label}.csv", index_col=False)
    data = data.drop(['label'], axis=1)
    data.loc[:, 'label'] = i
    data = data.sample(n = 300, replace=False)
    merged_data = pd.concat([merged_data, data], ignore_index=True)

df = merged_data

0
1
2
3
4
5
6
14
15


In [133]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,150,151,152,153,154,155,156,157,158,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.415358,0.449882,-0.371466,0.517967,0.521554,-0.584618,0.465842,0.521171,-0.599782,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.423235,0.440197,-0.408951,0.523118,0.503770,-0.643523,0.469722,0.508288,-0.654466,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.408610,0.483198,-0.239360,0.507468,0.557231,-0.439302,0.453750,0.559657,-0.436142,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.421941,0.478865,-0.273370,0.520912,0.559256,-0.511614,0.465992,0.563108,-0.509202,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.411952,0.441850,-0.484557,0.508371,0.512288,-0.730711,0.458071,0.508907,-0.741444,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2695,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.482557,0.518437,-0.940851,0.637055,0.646274,-1.320883,0.559788,0.650357,,8
2696,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.335127,0.558954,-0.920506,0.477018,0.639897,-1.257218,0.414909,0.649236,,8
2697,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.321588,0.384180,-1.380313,0.476797,0.496747,-1.730446,0.400312,0.503459,,8
2698,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.564278,0.397670,-0.481917,0.699580,0.504381,-0.784367,0.637758,0.498398,,8


In [134]:
# merged_data.to_csv("../../data/merge_num_7_data.csv", index=False)

## 데이터 불러오기

In [135]:
# import pandas as pd
# df = pd.read_csv("../../data/merge_num_7_data.csv")

## 클래스 확인

In [136]:
df['label'].unique()

array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int64)

## x, y 데이터

In [137]:
y = df['label']
x = df.drop(['label'], axis=1)

print(x.shape, y.shape)

(2700, 159) (2700,)


# 데이터 전처리

In [138]:
right_points_x = x.iloc[:, 63:63+63]
face_points_x = x.iloc[:, 63+63:]
nose_point_x = face_points_x.iloc[:, 0]
print(right_points_x.shape, face_points_x.shape, nose_point_x.shape)

(2700, 63) (2700, 33) (2700,)


In [139]:
angles_list = []
vector_list = []
face_hand_vector_list = []
for i in range(len(right_points_x)):
    hand_row = right_points_x.iloc[i].tolist()
    face_row = face_points_x.iloc[i].tolist()
    angles, _, _ = compute_joint_angles(hand_row)
    angles_list.append(angles)

    _, _, vector = compute_connected_unit_vectors(hand_row, return_flat=True)
    vector_list.append(vector)

    _, _, face_hand_vector = compute_face_hand_vectors(face_row, hand_row)
    face_hand_vector_list.append(face_hand_vector)

print(len(angles_list[0]), len(vector_list[0]))

angular_x = pd.DataFrame(angles_list, columns=None)
vector_x = pd.DataFrame(vector_list, columns=None)
face_hand_vector_x = pd.DataFrame(face_hand_vector_list, columns=None)

print(angular_x.shape, vector_x.shape, face_hand_vector_x.shape)

15 72
(2700, 15) (2700, 72) (2700, 54)


In [140]:
result_x = pd.concat([angular_x, vector_x, face_hand_vector_x], axis=1)
result_x.columns = range(len(result_x.columns))

print(result_x.shape)

(2700, 141)


In [141]:
# result_x.to_csv("../../data/angle_vector_num_7_data.csv", index=False)

## 데이터 분할하기

In [142]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(result_x, y, test_size=0.2, random_state=42, shuffle=True, stratify=y)

## 데이터 스케일링

In [143]:
# from sklearn.preprocessing import MinMaxScaler
# mms = MinMaxScaler()
# x_train = mms.fit_transform(x_train)
# x_test = mms.transform(x_test)

## 모델 학습

In [144]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
import numpy as np

model = XGBClassifier(n_estimators=200, max_depth=3, learning_rate=0.01)
#model = RandomForestClassifier(n_estimators=300, max_depth=5, random_state=42)
#model = LogisticRegression(solver='liblinear', max_iter=100)
model.fit(x_train, y_train)

score = model.score(x_test, y_test)

print(f"Accuracy: {np.round(score*100, 2)}%")

Accuracy: 98.89%


In [145]:
from sklearn.metrics import classification_report

y_pred = model.predict(x_test)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.98      0.98      0.98        60
           1       0.97      0.98      0.98        60
           2       0.98      0.95      0.97        60
           3       0.98      1.00      0.99        60
           4       1.00      1.00      1.00        60
           5       1.00      1.00      1.00        60
           6       1.00      1.00      1.00        60
           7       0.98      0.98      0.98        60
           8       1.00      1.00      1.00        60

    accuracy                           0.99       540
   macro avg       0.99      0.99      0.99       540
weighted avg       0.99      0.99      0.99       540



## 모델 저장하기

In [146]:
import joblib

joblib.dump(model, "../../models/xgb_sample_angle_vector_model.pkl")

['../../models/xgb_sample_angle_vector_model.pkl']