In [1]:
# =========================
# 0) 라이브러리 & CSV 로드
# =========================
from google.colab import drive
drive.mount('/content/drive')

import os, numpy as np, pandas as pd, tensorflow as tf
from tensorflow.keras import layers, models

CSV_PATH = "/content/drive/MyDrive/mhealth_dataset_combined.csv"  # 경로 확인

df = pd.read_csv(CSV_PATH)
print("raw:", df.shape, "subjects:", sorted(df["subject"].unique()))

Mounted at /content/drive
raw: (1215745, 25) subjects: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10)]


In [6]:
# =========================
# 1) 파라미터
# =========================
FS_HZ         = 50        # 샘플링 주파수
FRAME_SECONDS = 4         # 윈도우 길이(초)
HOP_RATIO     = 0.5       # 0.5=50% overlap, 1.0=겹치기 없음
FEATURE_MODE  = "all"     # "all" or "acc_gyro_only"

BATCH   = 256
AUTOTUNE = tf.data.AUTOTUNE

# =========================
# 2) 컬럼 정리
# =========================
ALL_FEATS = [c for c in df.columns if c not in ("label","subject")]
FEATS = [c for c in ALL_FEATS if ("acc" in c or "gyro" in c)] if FEATURE_MODE=="acc_gyro_only" else ALL_FEATS

df[FEATS] = df[FEATS].astype("float32")
df["label"] = df["label"].astype("int16")

print("features used:", len(FEATS))

features used: 23


In [7]:
# =========================
# 3) subject별 80/20 split
# =========================
train_parts, test_parts = [], []
for sid, g in df.groupby("subject"):
    n = int(len(g) * 0.8)
    train_parts.append(g.iloc[:n])
    test_parts.append(g.iloc[n:])

train_df = pd.concat(train_parts, ignore_index=True)
test_df  = pd.concat(test_parts,  ignore_index=True)

print("train_df:", train_df.shape, "test_df:", test_df.shape)

train_df: (972592, 25) test_df: (243153, 25)


In [8]:
# =========================
# 4) 제너레이터 기반 슬라이딩 윈도우
# =========================
FRAME = FS_HZ * FRAME_SECONDS
HOP   = max(1, int(FRAME * HOP_RATIO))
N_FEATS = len(FEATS)
N_CLASSES = int(df["label"].max()) + 1

# train 통계로 표준화
mu  = train_df[FEATS].mean().to_numpy(dtype=np.float32)
std = train_df[FEATS].std(ddof=0).replace(0, 1.0).to_numpy(dtype=np.float32)

def window_generator(df_part, frame, hop, feats, mu, std):
    X = df_part[feats].to_numpy(dtype=np.float32, copy=False)
    y = df_part["label"].to_numpy(dtype=np.int32, copy=False)
    N = len(df_part)
    for i in range(0, N - frame, hop):
        seg = (X[i:i+frame] - mu) / std
        lab = np.bincount(y[i:i+frame], minlength=N_CLASSES).argmax().astype(np.int32)
        yield seg, lab

def make_ds(df_part, frame, hop, feats, mu, std, batch=256, shuffle=False):
    output_sig = (
        tf.TensorSpec(shape=(frame, len(feats)), dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.int32),
    )
    ds = tf.data.Dataset.from_generator(
        lambda: window_generator(df_part, frame, hop, feats, mu, std),
        output_signature=output_sig
    )
    if shuffle:
        ds = ds.shuffle(10000)
    return ds.batch(batch).prefetch(AUTOTUNE)

ds_train = make_ds(train_df, FRAME, HOP, FEATS, mu, std, shuffle=True)
ds_test  = make_ds(test_df,  FRAME, HOP, FEATS, mu, std, shuffle=False)

In [9]:
# 5) LSTM 모델 정의
# =========================
def make_lstm(input_frame, n_feats, n_classes):
    inp = layers.Input(shape=(input_frame, n_feats))
    x = layers.LSTM(128, return_sequences=True)(inp)
    x = layers.LSTM(64)(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(n_classes, activation="softmax")(x)
    return models.Model(inp, out)

model = make_lstm(FRAME, N_FEATS, N_CLASSES)
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

In [10]:
# =========================
# 6) 학습 & 평가
# =========================
print("\n=== Train LSTM ===")
model.fit(ds_train, epochs=20, verbose=1)

print("\n=== Evaluate LSTM ===")
test_loss, test_acc = model.evaluate(ds_test, verbose=0)
print(f"Test Accuracy: {test_acc:.4f}")


=== Train LSTM ===
Epoch 1/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 33ms/step - accuracy: 0.4670 - loss: 1.9011
Epoch 2/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.7302 - loss: 0.8900
Epoch 3/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 34ms/step - accuracy: 0.7586 - loss: 0.6427
Epoch 4/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 33ms/step - accuracy: 0.7779 - loss: 0.6128
Epoch 5/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.7543 - loss: 0.6218
Epoch 6/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - accuracy: 0.7773 - loss: 0.6223
Epoch 7/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - accuracy: 0.8089 - loss: 0.4931
Epoch 8/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.8176 - loss: 0.4792
Epoch 9/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 36ms/step - accuracy: 0.8058 - loss: 0.4697
Epoch 10/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 33ms/step - accuracy: 0.8215 - loss: 0.4586
Epoch 11/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.8199 - loss: 0.4665
Epoch 12/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 35ms/step - accuracy: 0.8340 - loss: 0.4218
Epoch 13/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 32ms/step - accuracy: 0.8390 - loss: 0.3879
Epoch 14/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.8344 - loss: 0.4020
Epoch 15/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 36ms/step - accuracy: 0.8420 - loss: 0.3733
Epoch 16/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.8503 - loss: 0.3431
Epoch 17/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.8204 - loss: 0.4276
Epoch 18/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 36ms/step - accuracy: 0.8533 - loss: 0.3500
Epoch 19/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.8619 - loss: 0.3348
Epoch 20/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.8595 - loss: 0.3367

=== Evaluate LSTM ===




Test Accuracy: 0.8782




In [11]:
# =========================
# 7) 분류 리포트
# =========================
from sklearn.metrics import classification_report, confusion_matrix

y_true, y_pred = [], []
for xb, yb in ds_test:
    pb = model.predict(xb, verbose=0).argmax(axis=1)
    y_true.append(yb.numpy()); y_pred.append(pb)
y_true = np.concatenate(y_true); y_pred = np.concatenate(y_pred)

print(classification_report(y_true, y_pred, digits=4))
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

           0     0.9216    0.9415    0.9315      1949
           4     0.0000    0.0000    0.0000         0
           5     0.7377    0.2922    0.4186       154
           6     0.0000    0.0000    0.0000         0
           8     0.0000    0.0000    0.0000         0
           9     0.6000    0.9836    0.7453        61
          10     0.7353    0.7463    0.7407        67
          11     0.8966    0.8844    0.8904       147
          12     0.5000    0.2692    0.3500        52

    accuracy                         0.8782      2430
   macro avg     0.4879    0.4575    0.4530      2430
weighted avg     0.8862    0.8782    0.8741      2430

Confusion matrix:
 [[1835    5   16    7    6   40   11   15   14]
 [   0    0    0    0    0    0    0    0    0]
 [  90    0   45    0   19    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0]
 [   1    0    0    0    0   60    0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
