In [1]:
# ==============================================
# 0) CSV 불러오기
# ==============================================
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/mhealth_dataset_combined.csv")
print("data:", df.shape, "subjects:", sorted(df.subject.unique()))
df

data: (1215745, 25) subjects: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10)]


Unnamed: 0,chest_acc_x,chest_acc_y,chest_acc_z,ecg_1,ecg_2,l_ankle_acc_x,l_ankle_acc_y,l_ankle_acc_z,l_ankle_gyro_x,l_ankle_gyro_y,...,r_arm_acc_y,r_arm_acc_z,r_arm_gyro_x,r_arm_gyro_y,r_arm_gyro_z,r_arm_mag_x,r_arm_mag_y,r_arm_mag_z,label,subject
0,-9.8184,0.009971,0.295630,0.004186,0.004186,2.18490,-9.6967,0.63077,0.103900,-0.84053,...,-4.5781,0.187760,-0.44902,-1.01030,0.034483,-2.35000,-1.610200,-0.030899,0,1
1,-9.8489,0.524040,0.373480,0.004186,0.016745,2.38760,-9.5080,0.68389,0.085343,-0.83865,...,-4.3198,0.023595,-0.44902,-1.01030,0.034483,-2.16320,-0.882540,0.326570,0,1
2,-9.6602,0.181850,0.437420,0.016745,0.037677,2.40860,-9.5674,0.68113,0.085343,-0.83865,...,-4.2772,0.275720,-0.44902,-1.01030,0.034483,-1.61750,-0.165620,-0.030693,0,1
3,-9.6507,0.214220,0.240330,0.079540,0.117220,2.18140,-9.4301,0.55031,0.085343,-0.83865,...,-4.3163,0.367520,-0.45686,-1.00820,0.025862,-1.07710,0.006945,-0.382620,0,1
4,-9.7030,0.303890,0.311560,0.221870,0.205130,2.41730,-9.3889,0.71098,0.085343,-0.83865,...,-4.1459,0.407290,-0.45686,-1.00820,0.025862,-0.53684,0.175900,-1.095500,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1215740,-12.2440,-2.970600,-0.035772,0.812140,1.096800,0.57061,-2.5602,0.41936,-0.055659,0.64165,...,-16.9660,2.370400,0.10980,-0.99384,0.191810,-6.12870,15.495000,18.856000,0,9
1215741,-10.9220,-3.419000,-0.339280,1.469400,1.285200,5.26840,-4.9331,9.66020,-0.055659,0.64165,...,-13.3900,2.997200,0.10980,-0.99384,0.191810,-10.24200,17.139000,33.920000,0,9
1215742,-9.4842,-3.064300,-1.033700,0.238620,0.891680,0.53805,-5.9706,2.93600,-0.055659,0.64165,...,-11.3790,3.149800,0.10980,-0.99384,0.191810,-12.94400,16.170000,43.262000,0,9
1215743,-8.7889,-2.475700,-0.612290,-0.205130,0.460490,1.56950,-7.9809,-2.05000,-0.085343,0.43715,...,-10.0000,3.000000,0.11961,-0.97331,0.153020,-14.52400,1.849400,43.373000,0,9


In [8]:
# =========================
# 1) 파라미터
# =========================
FS_HZ         = 50        # 샘플링 주파수
FRAME_SECONDS = 4         # 윈도우 길이(초)
HOP_RATIO     = 0.5       # 0.5=50% overlap, 1.0=겹치기 없음
FEATURE_MODE  = "all"     # "all" or "acc_gyro_only"

BATCH   = 256
AUTOTUNE = tf.data.AUTOTUNE

# =========================
# 2) 컬럼 정리
# =========================
ALL_FEATS = [c for c in df.columns if c not in ("label","subject")]
FEATS = [c for c in ALL_FEATS if ("acc" in c or "gyro" in c)] if FEATURE_MODE=="acc_gyro_only" else ALL_FEATS

df[FEATS] = df[FEATS].astype("float32")
df["label"] = df["label"].astype("int16")

print("features used:", len(FEATS))

features used: 23


In [9]:
# =========================
# 3) subject별 80/20 split
# =========================
train_parts, test_parts = [], []
for sid, g in df.groupby("subject"):
    n = int(len(g) * 0.8)
    train_parts.append(g.iloc[:n])
    test_parts.append(g.iloc[n:])

train_df = pd.concat(train_parts, ignore_index=True)
test_df  = pd.concat(test_parts,  ignore_index=True)

print("train_df:", train_df.shape, "test_df:", test_df.shape)

train_df: (972592, 25) test_df: (243153, 25)


In [10]:
# =========================
# 4) 제너레이터 기반 슬라이딩 윈도우
# =========================
FRAME = FS_HZ * FRAME_SECONDS
HOP   = max(1, int(FRAME * HOP_RATIO))
N_FEATS = len(FEATS)
N_CLASSES = int(df["label"].max()) + 1

# train 통계로 표준화
mu  = train_df[FEATS].mean().to_numpy(dtype=np.float32)
std = train_df[FEATS].std(ddof=0).replace(0, 1.0).to_numpy(dtype=np.float32)

def window_generator(df_part, frame, hop, feats, mu, std):
    X = df_part[feats].to_numpy(dtype=np.float32, copy=False)
    y = df_part["label"].to_numpy(dtype=np.int32, copy=False)
    N = len(df_part)
    for i in range(0, N - frame, hop):
        seg = (X[i:i+frame] - mu) / std
        lab = np.bincount(y[i:i+frame], minlength=N_CLASSES).argmax().astype(np.int32)
        yield seg, lab

def make_ds(df_part, frame, hop, feats, mu, std, batch=BATCH, shuffle=False):
    output_sig = (
        tf.TensorSpec(shape=(frame, len(feats)), dtype=tf.float32),
        tf.TensorSpec(shape=(), dtype=tf.int32),
    )
    ds = tf.data.Dataset.from_generator(
        lambda: window_generator(df_part, frame, hop, feats, mu, std),
        output_signature=output_sig
    )
    if shuffle:
        ds = ds.shuffle(10000)
    return ds.batch(batch).prefetch(AUTOTUNE)

ds_train = make_ds(train_df, FRAME, HOP, FEATS, mu, std, shuffle=True)
ds_test  = make_ds(test_df,  FRAME, HOP, FEATS, mu, std, shuffle=False)


In [12]:
# =========================
# 5) CNN 모델 정의
# =========================
from tensorflow.keras import layers, models

def make_cnn(input_frame, n_feats, n_classes):
    inp = layers.Input(shape=(input_frame, n_feats))
    x = layers.Conv1D(64, 5, padding="same", activation="relu")(inp)
    x = layers.BatchNormalization()(x); x = layers.MaxPool1D(2)(x)
    x = layers.Conv1D(128, 5, padding="same", activation="relu")(x)
    x = layers.BatchNormalization()(x); x = layers.MaxPool1D(2)(x)
    x = layers.Conv1D(256, 3, padding="same", activation="relu")(x)
    x = layers.BatchNormalization()(x)
    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(n_classes, activation="softmax")(x)
    return models.Model(inp, out)

model = make_cnn(FRAME, N_FEATS, N_CLASSES)
model.compile(optimizer="adam",
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

In [13]:
# =========================
# 6) 학습 & 평가
# =========================
print("\n=== Train CNN ===")
model.fit(ds_train, epochs=20, verbose=1)

print("\n=== Evaluate CNN ===")
test_loss, test_acc = model.evaluate(ds_test, verbose=0)
print(f"Test Accuracy: {test_acc:.4f}")


=== Train CNN ===
Epoch 1/20
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 129ms/step - accuracy: 0.5077 - loss: 2.0034
Epoch 2/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.8157 - loss: 1.1702
Epoch 3/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.8473 - loss: 0.7873
Epoch 4/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 17ms/step - accuracy: 0.8711 - loss: 0.4976
Epoch 5/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.8821 - loss: 0.3544
Epoch 6/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.8741 - loss: 0.3306
Epoch 7/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.8908 - loss: 0.2836
Epoch 8/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.8935 - loss: 0.2720
Epoch 9/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.8982 - loss: 0.2507
Epoch 10/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9119 - loss: 0.2354
Epoch 11/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9045 - loss: 0.2344
Epoch 12/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9032 - loss: 0.2280
Epoch 13/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9017 - loss: 0.2313
Epoch 14/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.9139 - loss: 0.1988
Epoch 15/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9110 - loss: 0.2140
Epoch 16/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 18ms/step - accuracy: 0.9165 - loss: 0.1973
Epoch 17/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.9201 - loss: 0.1890
Epoch 18/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9308 - loss: 0.1736
Epoch 19/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.9255 - loss: 0.1777
Epoch 20/20




[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9300 - loss: 0.1718

=== Evaluate CNN ===




Test Accuracy: 0.8770




In [14]:
# =========================
# 7) 분류 리포트
# =========================
from sklearn.metrics import classification_report, confusion_matrix

y_true, y_pred = [], []
for xb, yb in ds_test:
    pb = model.predict(xb, verbose=0).argmax(axis=1)
    y_true.append(yb.numpy()); y_pred.append(pb)
y_true = np.concatenate(y_true); y_pred = np.concatenate(y_pred)

print(classification_report(y_true, y_pred, digits=4))
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))

              precision    recall  f1-score   support

           0     0.9256    0.9379    0.9317      1949
           1     0.0000    0.0000    0.0000         0
           4     0.0000    0.0000    0.0000         0
           5     0.7009    0.5325    0.6052       154
           6     0.0000    0.0000    0.0000         0
           8     0.0000    0.0000    0.0000         0
           9     0.6061    0.9836    0.7500        61
          10     0.6019    0.9254    0.7294        67
          11     0.8681    0.5374    0.6639       147
          12     0.7143    0.3846    0.5000        52

    accuracy                         0.8770      2430
   macro avg     0.4417    0.4301    0.4180      2430
weighted avg     0.8864    0.8770    0.8754      2430

Confusion matrix:
 [[1828    7    3   35    3    3   39   11   12    8]
 [   0    0    0    0    0    0    0    0    0    0]
 [   0    0    0    0    0    0    0    0    0    0]
 [  71    0    0   82    0    1    0    0    0    0]
 [   0    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
