In [161]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pickle
import json

In [162]:
with open(r'data\answer\answer_data_annotations.json', 'r') as f:
    answer_annotations = json.load(f)

with open(r'data\wrong\wrong_data_annotations.json', 'r') as f:
    wrong_annotations = json.load(f)

In [163]:
answer_data = 'answer_annotated_data.csv'
wrong_data = 'wrong_annotated_data.csv'

In [164]:
answer_data = pd.read_csv(answer_data)
wrong_data = pd.read_csv(wrong_data)

In [165]:
answer_data['Label'] = 1  # 정답 데이터 ==> label 1
wrong_data['Label'] = 0  # 오답 데이터 ==> label 0

In [166]:
def get_video_data(data, annotations):
    video_data = []
    for video in annotations['videos']:
        frames = [int(frame['frame'].split('_')[-1].split('.')[0]) for frame in video['annotations']]
        video_data.append(data.loc[frames].reset_index(drop=True))  # 인덱스 리셋
    return video_data

In [167]:
answer_videos = get_video_data(answer_data, answer_annotations)
wrong_videos = get_video_data(wrong_data, wrong_annotations)

In [168]:
all_videos = answer_videos + wrong_videos

In [169]:
scaler = MinMaxScaler()

In [170]:
def scale_videos(videos, scaler):
    scaled_videos = []
    for video in videos:
        features = video.select_dtypes(include=[np.number]).columns.difference(['Label'])  # 숫자형 데이터만 select
        scaled_data = scaler.fit_transform(video[features])
        scaled_video = pd.DataFrame(scaled_data, columns=features)
        scaled_video['Label'] = video['Label'].values
        scaled_videos.append(scaled_video)
    return scaled_videos

In [171]:
scaled_videos = scale_videos(all_videos, scaler)

In [172]:
def create_dataset_rf(videos, time_step=10):
    dataX, dataY = [], [] # dataX가 flatten된 시계열 데이터, dataY가 레이블
    for video in videos:
        dataset = video.drop(columns=['Label']).values
        labels = video['Label'].values
        for i in range(len(dataset) - time_step):
            a = dataset[i:(i + time_step)].flatten() ## i에서 time_step 까지의 데이터 가져오고 flatten
            dataX.append(a)
            dataY.append(labels[i + time_step]) # 레이블은 바로 다음 레이블 입력. 시계열 데이터 특성 반영
    return np.array(dataX), np.array(dataY)

In [173]:
time_step = 7 # 10개 연속된 데이터 설정.
X_rf, y_rf = create_dataset_rf(scaled_videos, time_step)

In [174]:
## 기존 trian, test, vali 셋 구분지어 놓은 것, train, vali 셋으로만 분리 수정

# X_train_rf, X_temp_rf, y_train_rf, y_temp_rf = train_test_split(X_rf, y_rf, test_size=0.4, random_state=42, stratify=y_rf) # stratify로 각 클래스 비율 유지.
# X_val_rf, X_test_rf, y_val_rf, y_test_rf = train_test_split(X_temp_rf, y_temp_rf, test_size=0.5, random_state=42, stratify=y_temp_rf)

X_train_rf, X_val_rf, y_train_rf, y_val_rf = train_test_split(X_rf, y_rf, test_size=0.4, random_state=42, stratify=y_rf)

In [175]:
model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train_rf, y_train_rf)

In [176]:
# vali set
y_val_pred_rf = model_rf.predict(X_val_rf)
accuracy_val_rf = accuracy_score(y_val_rf, y_val_pred_rf)
report_val_rf = classification_report(y_val_rf, y_val_pred_rf)

In [177]:
print(f'Vali set Accuracy: {accuracy_val_rf}')
print(f'Vali set Report:\n{report_val_rf}')

Vali set Accuracy: 1.0
Vali set Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        27
           1       1.00      1.00      1.00        72

    accuracy                           1.00        99
   macro avg       1.00      1.00      1.00        99
weighted avg       1.00      1.00      1.00        99



In [178]:
test_answer_annotations = json.load(open(r'data\test\answer\test_answer_data_annotations.json', 'r'))
test_wrong_annotations = json.load(open(r'data\test\wrong\test_wrong_data_annotations.json', 'r'))

In [179]:
test_answer_data = pd.read_csv('test_answer_annotated_data.csv')
test_wrong_data = pd.read_csv('test_wrong_annotated_data.csv')

In [180]:
test_answer_data['Label'] = 1  # 정답 데이터 ==> label 1
test_wrong_data['Label'] = 0  # 오답 데이터 ==> label 0


In [181]:
test_answer_videos = get_video_data(test_answer_data, test_answer_annotations)
test_wrong_videos = get_video_data(test_wrong_data, test_wrong_annotations)

In [182]:
test_all_videos = test_answer_videos + test_wrong_videos

In [183]:
scaled_test_videos = scale_videos(test_all_videos, scaler)

In [184]:
X_test_rf, y_test_rf = create_dataset_rf(scaled_test_videos, time_step)

In [185]:
y_test_pred_rf = model_rf.predict(X_test_rf)
accuracy_test_rf = accuracy_score(y_test_rf, y_test_pred_rf)
report_test_rf = classification_report(y_test_rf, y_test_pred_rf)
print(f'Test Accuracy: {accuracy_test_rf}')
print(f'Test Report:\n{report_test_rf}')

Test Accuracy: 0.6923076923076923
Test Report:
              precision    recall  f1-score   support

           0       0.64      0.96      0.77        28
           1       0.90      0.38      0.53        24

    accuracy                           0.69        52
   macro avg       0.77      0.67      0.65        52
weighted avg       0.76      0.69      0.66        52



In [186]:
#with open('running_posture_model_edit_frame.pkl', 'wb') as model_file:
#    pickle.dump(model_rf, model_file)

In [187]:
#with open('data_scaler_edit_frame.pkl', 'wb') as scaler_file:
#    pickle.dump(scaler, scaler_file)