In [15]:
import os
import pandas as pd

# 진짜 센서 데이터가 있는 폴더
base_path = r"D:\김지수\낙상\training\01.원천데이터\TS\센서"
data = []

valid_sensors = ['Segment Angular Velocity', 'Segment Acceleration', 'Sensor Magnetic Field']
valid_parts = ['Head', 'Pelvis']

def filter_columns(df):
    return [col for col in df.columns if (
        any(part in col for part in valid_parts) and
        any(sensor in col for sensor in valid_sensors)
    )]

# 비낙상 (N)
n_path = os.path.join(base_path, 'N')
for folder_name in os.listdir(n_path):
    csv_path = os.path.join(n_path, folder_name, f"{folder_name}.csv")
    if os.path.isfile(csv_path):
        df = pd.read_csv(csv_path)
        df['label'] = 'N'
        cols = filter_columns(df)
        if cols:
            data.append(df[cols + ['label']])
        else:
            print(f"[⚠️ 컬럼 없음] {csv_path}")
    else:
        print(f"[❌ 파일 없음] {csv_path}")

# 낙상 (Y > BY, FY, SY)
y_path = os.path.join(base_path, 'Y')
for fall_type in ['BY', 'FY', 'SY']:
    fall_path = os.path.join(y_path, fall_type)
    for folder_name in os.listdir(fall_path):
        csv_path = os.path.join(fall_path, folder_name, f"{folder_name}.csv")
        if os.path.isfile(csv_path):
            df = pd.read_csv(csv_path)
            df['label'] = 'Y'
            cols = filter_columns(df)
            if cols:
                data.append(df[cols + ['label']])
            else:
                print(f"[⚠️ 컬럼 없음] {csv_path}")
        else:
            print(f"[❌ 파일 없음] {csv_path}")

# 저장
if data:
    total_df = pd.concat(data, ignore_index=True)
    output_path = os.path.join(base_path, 'final_sensor_data.csv')
    total_df.to_csv(output_path, index=False)
    print(f"✅ 완료: '{output_path}' 에 저장됨!")
else:
    print("🚨 유효한 데이터가 없어 저장되지 않았습니다.")


✅ 완료: 'D:\김지수\낙상\training\01.원천데이터\TS\센서\final_sensor_data.csv' 에 저장됨!


In [19]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# 1. 데이터 불러오기 (경로 수정!)
data_path = r"D:\김지수\낙상\Training\01.원천데이터\TS\센서\final_sensor_data.csv"
df = pd.read_csv(data_path)

# 2. Feature & Label 분리
X = df.drop(columns=['label'])
y = df['label'].map({'N': 0, 'Y': 1})  # 이진 분류로 변환

# 3. 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. train/test 분할
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42, stratify=y
)

# 5. 모델 학습
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 6. 예측
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)[:, 1]  # AUC용 확률

# 7. 평가 결과 출력
print("✅ [분류 성능 평가]")
print(classification_report(y_test, y_pred, target_names=['Non-Fall', 'Fall']))

print("\n✅ [Confusion Matrix]")
print(confusion_matrix(y_test, y_pred))

print("\n✅ [ROC AUC Score]")
print("AUC: {:.4f}".format(roc_auc_score(y_test, y_proba)))


✅ [분류 성능 평가]
              precision    recall  f1-score   support

    Non-Fall       1.00      1.00      1.00    817920
        Fall       1.00      1.00      1.00   2445120

    accuracy                           1.00   3263040
   macro avg       1.00      1.00      1.00   3263040
weighted avg       1.00      1.00      1.00   3263040


✅ [Confusion Matrix]
[[ 817912       8]
 [      0 2445120]]

✅ [ROC AUC Score]
AUC: 1.0000
