In [1]:
# =========================
# 0) 라이브러리 & CSV 로드
# =========================
from google.colab import drive
drive.mount('/content/drive')

import os, numpy as np, pandas as pd, tensorflow as tf
from tensorflow.keras import layers, models

CSV_PATH = "/content/drive/MyDrive/mhealth_dataset_combined.csv"  # 경로 확인

df = pd.read_csv(CSV_PATH)
print("raw:", df.shape, "subjects:", sorted(df["subject"].unique()))

Mounted at /content/drive
raw: (1215745, 25) subjects: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10)]


In [2]:
# =========================
# 1) 파라미터
# =========================
FS_HZ         = 50        # 샘플링 주파수
FRAME_SECONDS = 4         # 윈도우 길이(초)
HOP_RATIO     = 0.5       # 0.5=50% overlap, 1.0=겹치기 없음
FEATURE_MODE  = "all"     # "all" or "acc_gyro_only"

BATCH   = 256
AUTOTUNE = tf.data.AUTOTUNE

# =========================
# 2) 컬럼 정리
# =========================
ALL_FEATS = [c for c in df.columns if c not in ("label","subject")]
FEATS = [c for c in ALL_FEATS if ("acc" in c or "gyro" in c)] if FEATURE_MODE=="acc_gyro_only" else ALL_FEATS

df[FEATS] = df[FEATS].astype("float32")
df["label"] = df["label"].astype("int16")

print("features used:", len(FEATS))

features used: 23


In [3]:
# =========================
# 3) subject별 80/20 split
# =========================
train_parts, test_parts = [], []
for sid, g in df.groupby("subject"):
    n = int(len(g) * 0.8)
    train_parts.append(g.iloc[:n])
    test_parts.append(g.iloc[n:])

train_df = pd.concat(train_parts, ignore_index=True)
test_df  = pd.concat(test_parts,  ignore_index=True)

print("train_df:", train_df.shape, "test_df:", test_df.shape)

train_df: (972592, 25) test_df: (243153, 25)


In [4]:
# =========================
# 4) 제너레이터 기반 슬라이딩 윈도우
# =========================
FRAME = FS_HZ * FRAME_SECONDS
HOP   = max(1, int(FRAME * HOP_RATIO))
N_FEATS = len(FEATS)
N_CLASSES = int(df["label"].max()) + 1

# train 통계로 표준화
mu  = train_df[FEATS].mean().to_numpy(dtype=np.float32)
std = train_df[FEATS].std(ddof=0).replace(0, 1.0).to_numpy(dtype=np.float32)

def window_generator(df_part, frame, hop, feats, mu, std):
    X = df_part[feats].to_numpy(dtype=np.float32, copy=False)
    y = df_part["label"].to_numpy(dtype=np.int32, copy=False)
    N = len(df_part)
    for i in range(0, N - frame, hop):
        seg = (X[i:i+frame] - mu) / std
        lab = np.bincount(y[i:i+frame], minlength=N_CLASSES).argmax().astype(np.int32)
        yield seg, lab

def make_ds(df_part, frame, hop, feats, mu, std, batch=256, shuffle=False):
    output_sig = (
        tf.TensorSpec(shape=(frame, len(feats)), dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.int32),
    )
    ds = tf.data.Dataset.from_generator(
        lambda: window_generator(df_part, frame, hop, feats, mu, std),
        output_signature=output_sig
    )
    if shuffle:
        ds = ds.shuffle(10000)
    return ds.batch(batch).prefetch(AUTOTUNE)

ds_train = make_ds(train_df, FRAME, HOP, FEATS, mu, std, shuffle=True)
ds_test  = make_ds(test_df,  FRAME, HOP, FEATS, mu, std, shuffle=False)

In [5]:
# =========================
# 5) CNN-LSTM 모델 정의
# =========================
def make_cnn_lstm(input_frame, n_feats, n_classes):
    inp = layers.Input(shape=(input_frame, n_feats))
    # CNN 블록
    x = layers.Conv1D(128, 5, padding="same", activation="relu")(inp)
    x = layers.BatchNormalization()(x); x = layers.MaxPool1D(2)(x)
    x = layers.Conv1D(256, 3, padding="same", activation="relu")(x)
    x = layers.BatchNormalization()(x); x = layers.MaxPool1D(2)(x)
    # LSTM 블록
    x = layers.LSTM(128)(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(n_classes, activation="softmax")(x)
    return models.Model(inp, out)

model = make_cnn_lstm(FRAME, N_FEATS, N_CLASSES)
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

In [6]:
# =========================
# 6) 학습 & 평가
# =========================
print("\n=== Train CNN-LSTM ===")
model.fit(ds_train, epochs=20, verbose=1)

print("\n=== Evaluate CNN-LSTM ===")
test_loss, test_acc = model.evaluate(ds_test, verbose=0)
print(f"Test Accuracy: {test_acc:.4f}")


=== Train CNN-LSTM ===
Epoch 1/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 33ms/step - accuracy: 0.6257 - loss: 1.3075
Epoch 2/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.8355 - loss: 0.4040
Epoch 3/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - accuracy: 0.8635 - loss: 0.3257
Epoch 4/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.8560 - loss: 0.3227
Epoch 5/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 30ms/step - accuracy: 0.8854 - loss: 0.2767
Epoch 6/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 36ms/step - accuracy: 0.8933 - loss: 0.2547
Epoch 7/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - accuracy: 0.8900 - loss: 0.2546
Epoch 8/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 33ms/step - accuracy: 0.9011 - loss: 0.2406
Epoch 9/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - accuracy: 0.9088 - loss: 0.2214
Epoch 10/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 34ms/step - accuracy: 0.9112 - loss: 0.2069
Epoch 11/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 33ms/step - accuracy: 0.9129 - loss: 0.2097
Epoch 12/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - accuracy: 0.9081 - loss: 0.2127
Epoch 13/20
[1m 1/38[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m1:56[0m 3s/step - accuracy: 0.9180 - loss: 0.1679



[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 122ms/step - accuracy: 0.9198 - loss: 0.1881
Epoch 14/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - accuracy: 0.9179 - loss: 0.1909
Epoch 15/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 34ms/step - accuracy: 0.9257 - loss: 0.1822
Epoch 16/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 33ms/step - accuracy: 0.9268 - loss: 0.1769
Epoch 17/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 31ms/step - accuracy: 0.9275 - loss: 0.1771
Epoch 18/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.9306 - loss: 0.1642
Epoch 19/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 34ms/step - accuracy: 0.9270 - loss: 0.1785
Epoch 20/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.9337 - loss: 0.1596

=== Evaluate CNN-LSTM ===




Test Accuracy: 0.8840




In [7]:
# =========================
# 7) 분류 리포트
# =========================
from sklearn.metrics import classification_report, confusion_matrix

y_true, y_pred = [], []
for xb, yb in ds_test:
    pb = model.predict(xb, verbose=0).argmax(axis=1)
    y_true.append(yb.numpy()); y_pred.append(pb)
y_true = np.concatenate(y_true); y_pred = np.concatenate(y_pred)

print(classification_report(y_true, y_pred, digits=4))
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

           0     0.9317    0.9518    0.9416      1949
           1     0.0000    0.0000    0.0000         0
           2     0.0000    0.0000    0.0000         0
           4     0.0000    0.0000    0.0000         0
           5     0.8018    0.5779    0.6717       154
           6     0.0000    0.0000    0.0000         0
           8     0.0000    0.0000    0.0000         0
           9     0.7500    0.9836    0.8511        61
          10     0.4692    0.9104    0.6193        67
          11     0.9559    0.4422    0.6047       147
          12     0.6667    0.3462    0.4557        52

    accuracy                         0.8840      2430
   macro avg     0.4159    0.3829    0.3767      2430
weighted avg     0.9019    0.8840    0.8826      2430

Confusion matrix:
 [[1855    8    1    5   22    7    2   20   17    3    9]
 [   0    0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
