<a href="https://colab.research.google.com/github/Heoyuna0819/machine_learning/blob/main/Mhealth_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import os
import pandas as pd

# 데이터셋 경로 설정
DATA_DIR = "/content/drive/MyDrive/MHEALTHDATASET"

# UCI MHEALTH dataset 컬럼 정의
COLS = [
    "chest_acc_x","chest_acc_y","chest_acc_z",
    "ecg_1","ecg_2",
    "ankle_acc_x","ankle_acc_y","ankle_acc_z",
    "ankle_gyro_x","ankle_gyro_y","ankle_gyro_z",
    "ankle_mag_x","ankle_mag_y","ankle_mag_z",
    "arm_acc_x","arm_acc_y","arm_acc_z",
    "arm_gyro_x","arm_gyro_y","arm_gyro_z",
    "arm_mag_x","arm_mag_y","arm_mag_z",
    "label"
]

# 모든 subject 데이터 읽어서 합치기
dfs = []
for sid in range(1, 11):  # subject 1~10
    file_path = os.path.join(DATA_DIR, f"mHealth_subject{sid}.log")
    df = pd.read_csv(file_path, sep="\t", header=None, names=COLS)
    df["subject"] = sid   # subject 번호 추가
    dfs.append(df)

# 하나의 DataFrame으로 합치기
full_df = pd.concat(dfs, ignore_index=True)

# Null 클래스(라벨=0)는 제거
full_df = full_df[full_df["label"] != 0].reset_index(drop=True)

print(full_df.shape)
print(full_df.head())

(343195, 25)
   chest_acc_x  chest_acc_y  chest_acc_z     ecg_1     ecg_2  ankle_acc_x  \
0      -9.7788      0.55690      1.19750  0.008373 -0.033490       2.6493   
1      -9.7733      0.27880      0.73036 -0.025118 -0.025118       2.4157   
2      -9.8609      0.11561      0.79988  0.025118  0.016745       2.3865   
3      -9.7409      0.17652      0.88957  0.180010  0.129770       2.3758   
4      -9.7821      0.21637      0.90368  0.092098  0.046049       2.3239   

   ankle_acc_y  ankle_acc_z  ankle_gyro_x  ankle_gyro_y  ...  arm_acc_y  \
0      -9.4517      0.37683      -0.20965      -0.88931  ...    -9.0618   
1      -9.5306      0.40179      -0.20965      -0.88931  ...    -9.2048   
2      -9.5991      0.48141      -0.20037      -0.86867  ...    -9.1945   
3      -9.5997      0.42919      -0.20037      -0.86867  ...    -9.1746   
4      -9.5406      0.40038      -0.20037      -0.86867  ...    -9.2039   

   arm_acc_z  arm_gyro_x  arm_gyro_y  arm_gyro_z  arm_mag_x  arm_mag_y  \

In [3]:
import numpy as np

# 윈도우 크기 & stride
FS = 50          # 주파수 50Hz
WIN = 2 * FS     # 2초 = 100 샘플
STRIDE = WIN // 2  # 절반 겹치기 = 50 샘플

# feature 컬럼 (라벨과 subject 제외)
FEATURE_COLS = [c for c in full_df.columns if c not in ["label", "subject"]]

def make_windows_by_subject(df, min_ratio=0.0):
    Xs, ys, subs = [], [], []

    # subject별로 안전하게 자르기
    for sid, part in df.groupby("subject", sort=True):
        arr = part[FEATURE_COLS].values.astype(np.float32)  # (N_s, 23)
        labels = part["label"].values.astype(np.int32)      # (N_s,)
        n = len(part)
        i = 0
        while i + WIN <= n:
            w_labels = labels[i:i+WIN]
            w_vals = arr[i:i+WIN]

            # 최빈값(label)과 비율
            binc = np.bincount(w_labels)
            maj = np.argmax(binc)
            maj_ratio = binc[maj] / WIN

            if maj_ratio >= min_ratio:
                Xs.append(w_vals)
                ys.append(maj)
                subs.append(sid)

            i += STRIDE

    X = np.array(Xs)             # (num_windows, 100, 23)
    y = np.array(ys, dtype=int)  # (num_windows,)
    s = np.array(subs, dtype=int)
    return X, y, s


X_all, y_all, s_all = make_windows_by_subject(full_df, min_ratio=0.0)

print("X_all shape:", X_all.shape)
print("y_all shape:", y_all.shape)
print("subject unique:", np.unique(s_all))
# 라벨 1~12 분포
counts = np.bincount(y_all)
print("라벨 분포(1~12):", counts[1:13])

X_all shape: (6849, 100, 23)
y_all shape: (6849,)
subject unique: [ 1  2  3  4  5  6  7  8  9 10]
라벨 분포(1~12): [610 615 615 614 608 567 587 587 614 614 616 202]


In [4]:
import numpy as np

# 5개 그룹
pairs = [(1,2), (3,4), (5,6), (7,8), (9,10)]

def split_by_subject(X, y, s, test_pair):
    """
    X: 윈도우 입력 데이터 (shape: [N, T, D])
    y: 라벨 (shape: [N])
    s: subject 번호 (shape: [N])
    test_pair: (예: (1,2)) 테스트 subject 번호 쌍
    """
    test_mask = np.isin(s, test_pair)   # 테스트셋에 해당하는 subject만 True
    X_train, y_train = X[~test_mask], y[~test_mask]
    X_test,  y_test  = X[test_mask],  y[test_mask]
    return X_train, y_train, X_test, y_test

# 다섯 fold 순환
for i, pair in enumerate(pairs, 1):
    X_tr, y_tr, X_te, y_te = split_by_subject(X_all, y_all, s_all, pair)
    print(f"Fold {i} | Test subjects={pair}")
    print("  Train set:", X_tr.shape, y_tr.shape)
    print("  Test set :", X_te.shape, y_te.shape)


Fold 1 | Test subjects=(1, 2)
  Train set: (5438, 100, 23) (5438,)
  Test set : (1411, 100, 23) (1411,)
Fold 2 | Test subjects=(3, 4)
  Train set: (5438, 100, 23) (5438,)
  Test set : (1411, 100, 23) (1411,)
Fold 3 | Test subjects=(5, 6)
  Train set: (5529, 100, 23) (5529,)
  Test set : (1320, 100, 23) (1320,)
Fold 4 | Test subjects=(7, 8)
  Train set: (5500, 100, 23) (5500,)
  Test set : (1349, 100, 23) (1349,)
Fold 5 | Test subjects=(9, 10)
  Train set: (5491, 100, 23) (5491,)
  Test set : (1358, 100, 23) (1358,)


In [5]:
import numpy as np

# 5개 그룹
pairs = [(1,2), (3,4), (5,6), (7,8), (9,10)]

def split_by_subject(X, y, s, test_pair):
    test_mask = np.isin(s, test_pair)
    X_train, y_train = X[~test_mask], y[~test_mask]
    X_test,  y_test  = X[test_mask],  y[test_mask]
    return X_train, y_train, X_test, y_test

def standardize(train_X, test_X):

    # 훈련 데이터 기준으로 mean/std 계산
    mean = train_X.mean(axis=(0,1), keepdims=True)
    std = train_X.std(axis=(0,1), keepdims=True)
    std[std == 0] = 1.0

    # 표준화 적용
    train_X = (train_X - mean) / std
    test_X  = (test_X  - mean) / std
    return train_X, test_X, mean, std

# 5-Fold 교차검증용 루프
for i, pair in enumerate(pairs, 1):
    X_tr, y_tr, X_te, y_te = split_by_subject(X_all, y_all, s_all, pair)
    X_tr, X_te, mean, std = standardize(X_tr, X_te)

    print(f"Fold {i} | Test subjects={pair}")
    print("  Train set:", X_tr.shape, y_tr.shape)
    print("  Test set :", X_te.shape, y_te.shape)
    print("  평균:", np.round(mean.mean(), 3), " | 표준편차:", np.round(std.mean(), 3))
    print("-" * 50)

Fold 1 | Test subjects=(1, 2)
  Train set: (5438, 100, 23) (5438,)
  Test set : (1411, 100, 23) (1411,)
  평균: -1.097  | 표준편차: 14.805
--------------------------------------------------
Fold 2 | Test subjects=(3, 4)
  Train set: (5438, 100, 23) (5438,)
  Test set : (1411, 100, 23) (1411,)
  평균: -1.105  | 표준편차: 14.749
--------------------------------------------------
Fold 3 | Test subjects=(5, 6)
  Train set: (5529, 100, 23) (5529,)
  Test set : (1320, 100, 23) (1320,)
  평균: -1.076  | 표준편차: 14.514
--------------------------------------------------
Fold 4 | Test subjects=(7, 8)
  Train set: (5500, 100, 23) (5500,)
  Test set : (1349, 100, 23) (1349,)
  평균: -1.137  | 표준편차: 14.941
--------------------------------------------------
Fold 5 | Test subjects=(9, 10)
  Train set: (5491, 100, 23) (5491,)
  Test set : (1358, 100, 23) (1358,)
  평균: -1.045  | 표준편차: 14.942
--------------------------------------------------


In [6]:
import numpy as np

# Utility

def one_hot(y, num_classes):
    oh = np.zeros((y.shape[0], num_classes), dtype=np.float32)
    oh[np.arange(y.shape[0]), y - 1] = 1.0
    return oh

def softmax(x):
    x = x - x.max(axis=1, keepdims=True)
    e = np.exp(x)
    return e / (e.sum(axis=1, keepdims=True) + 1e-12)

def cross_entropy(probs, y_onehot):
    return -np.mean(np.sum(y_onehot * np.log(probs + 1e-12), axis=1))

def macro_f1(y_true, y_pred, num_classes=12):
    f1s = []
    for c in range(1, num_classes + 1):
        tp = np.sum((y_pred == c) & (y_true == c))
        fp = np.sum((y_pred == c) & (y_true != c))
        fn = np.sum((y_pred != c) & (y_true == c))
        prec = tp / (tp + fp + 1e-12)
        rec = tp / (tp + fn + 1e-12)
        f1 = 2 * prec * rec / (prec + rec + 1e-12)
        f1s.append(f1)
    return float(np.mean(f1s))

# LayerNorm

class LayerNorm:
    def __init__(self, dim, eps=1e-5):
        self.gamma = np.ones((1, dim), dtype=np.float32)
        self.beta  = np.zeros((1, dim), dtype=np.float32)
        self.eps = eps

    def forward(self, x):
        self.x = x
        self.mean = x.mean(axis=1, keepdims=True)
        self.var  = x.var(axis=1, keepdims=True)
        self.norm = (x - self.mean) / np.sqrt(self.var + self.eps)
        return self.gamma * self.norm + self.beta

    def backward(self, grad):
        N, D = grad.shape
        gamma = self.gamma

        dnorm = grad * gamma
        dvar  = np.sum(dnorm * (self.x - self.mean) * -0.5 *
                       (self.var + self.eps)**(-3/2), axis=1, keepdims=True)
        dmean = np.sum(dnorm * -1 / np.sqrt(self.var + self.eps), axis=1, keepdims=True) \
                + dvar * np.mean(-2 * (self.x - self.mean), axis=1, keepdims=True)

        dx = dnorm / np.sqrt(self.var + self.eps) \
             + dvar * 2 * (self.x - self.mean) / D \
             + dmean / D

        self.dgamma = np.sum(grad * self.norm, axis=0, keepdims=True)
        self.dbeta  = np.sum(grad, axis=0, keepdims=True)
        return dx

# LSTM with Backward (Full)

class LSTM:
    def __init__(self, input_dim, hidden_dim, rng):
        self.in_dim = input_dim
        self.h_dim = hidden_dim
        lim = np.sqrt(1.0 / (input_dim + hidden_dim))

        self.Wx = rng.uniform(-lim, lim, (input_dim, 4 * hidden_dim)).astype(np.float32)
        self.Wh = rng.uniform(-lim, lim, (hidden_dim, 4 * hidden_dim)).astype(np.float32)
        self.b  = np.zeros((4 * hidden_dim,), dtype=np.float32)

    def forward(self, X):
        N, T, D = X.shape
        H = self.h_dim

        self.x_list = []
        self.i_list = []
        self.f_list = []
        self.o_list = []
        self.g_list = []
        self.c_list = []
        self.h_list = []

        h = np.zeros((N, H), dtype=np.float32)
        c = np.zeros((N, H), dtype=np.float32)

        self.c_list.append(c.copy())
        self.h_list.append(h.copy())

        for t in range(T):
            x_t = X[:, t, :]
            a = x_t @ self.Wx + h @ self.Wh + self.b

            i = 1 / (1 + np.exp(-a[:, :H]))
            f = 1 / (1 + np.exp(-a[:, H:2*H]))
            o = 1 / (1 + np.exp(-a[:, 2*H:3*H]))
            g = np.tanh(a[:, 3*H:])

            c = f * c + i * g
            h = o * np.tanh(c)

            self.x_list.append(x_t)
            self.i_list.append(i)
            self.f_list.append(f)
            self.o_list.append(o)
            self.g_list.append(g)
            self.c_list.append(c.copy())
            self.h_list.append(h.copy())

        return np.stack(self.h_list[1:], axis=1)


    def backward(self, dh_last):
        N, H = dh_last.shape
        T = len(self.x_list)

        dWx = np.zeros_like(self.Wx)
        dWh = np.zeros_like(self.Wh)
        db  = np.zeros_like(self.b)

        dx = np.zeros((N, T, self.in_dim), dtype=np.float32)

        dh_next = dh_last
        dc_next = np.zeros((N, H), dtype=np.float32)

        for t in reversed(range(T)):
            x_t = self.x_list[t]
            i = self.i_list[t]
            f = self.f_list[t]
            o = self.o_list[t]
            g = self.g_list[t]
            c_t = self.c_list[t+1]
            c_prev = self.c_list[t]
            h_prev = self.h_list[t]

            tanh_c = np.tanh(c_t)

            do = dh_next * tanh_c
            dc = dh_next * o * (1 - tanh_c**2) + dc_next

            df = dc * c_prev
            di = dc * g
            dg = dc * i
            dc_prev = dc * f

            di_in = di * i * (1 - i)
            df_in = df * f * (1 - f)
            do_in = do * o * (1 - o)
            dg_in = dg * (1 - g**2)

            da = np.concatenate([di_in, df_in, do_in, dg_in], axis=1)

            dWx += x_t.T @ da
            dWh += h_prev.T @ da
            db  += da.sum(axis=0)

            dx[:, t, :] = da @ self.Wx.T
            dh_prev = da @ self.Wh.T

            dh_next = dh_prev
            dc_next = dc_prev

        self.dWx, self.dWh, self.db = dWx, dWh, db
        return dx

# Dense
class Dense:
    def __init__(self, in_dim, out_dim, rng):
        lim = np.sqrt(6.0/(in_dim + out_dim))
        self.W = rng.uniform(-lim, lim, (in_dim, out_dim)).astype(np.float32)
        self.b = np.zeros((out_dim,), dtype=np.float32)

        self.mW = np.zeros_like(self.W)
        self.vW = np.zeros_like(self.W)
        self.mb = np.zeros_like(self.b)
        self.vb = np.zeros_like(self.b)

    def forward(self, x):
        self.x = x
        return x @ self.W + self.b

class SimpleLSTMPlus:

    def __init__(self, T, D, H=256, num_classes=12):
        rng = np.random.RandomState(42)

        self.lstm = LSTM(D, H, rng)
        self.norm = LayerNorm(H)

        # attention parameters
        self.att_w = rng.uniform(-0.1, 0.1, (H, 1)).astype(np.float32)

        self.fc1 = Dense(H, 128, rng)
        self.fc2 = Dense(128, num_classes, rng)

        self.lr = 7e-4
        self.beta1=0.9; self.beta2=0.999
        self.eps = 1e-8
        self.t = 0

        self.dropout = 0.25
        self.training = True


    def _adam(self, param, grad, m, v):
        self.t += 1
        m[:] = self.beta1*m + (1-self.beta1)*grad
        v[:] = self.beta2*v + (1-self.beta2)*(grad*grad)
        mhat = m / (1 - self.beta1**self.t)
        vhat = v / (1 - self.beta2**self.t)
        param -= self.lr * mhat / (np.sqrt(vhat) + self.eps)


    def forward(self, X):
        hs = self.lstm.forward(X)                 # (N,T,H)

        # LayerNorm
        N,T,H = hs.shape
        hs_norm = self.norm.forward(hs.reshape(N*T, H)).reshape(N, T, H)

        # attention score
        score = hs_norm @ self.att_w              # (N,T,1)
        score = np.exp(score - score.max(axis=1,keepdims=True))
        att = score / (score.sum(axis=1, keepdims=True)+1e-12)

        # weighted pooling
        h_att = (hs_norm * att).sum(axis=1)        # (N,H)

        if self.training:
            mask = (np.random.rand(*h_att.shape) > self.dropout).astype(np.float32)
            h_att = h_att * mask / (1 - self.dropout)

        z1 = self.fc1.forward(h_att)
        h1 = np.maximum(z1, 0)

        logits = self.fc2.forward(h1)
        return logits, h1, h_att, hs_norm, att


    def step(self):
        clip = 5.0
        for g in [self.fc2.dW, self.fc2.db, self.fc1.dW, self.fc1.db,
                  self.lstm.dWx, self.lstm.dWh, self.lstm.db]:
            np.clip(g, -clip, clip, out=g)

        # FC updates
        self._adam(self.fc2.W, self.fc2.dW, self.fc2.mW, self.fc2.vW)
        self._adam(self.fc2.b, self.fc2.db, self.fc2.mb, self.fc2.vb)
        self._adam(self.fc1.W, self.fc1.dW, self.fc1.mW, self.fc1.vW)
        self._adam(self.fc1.b, self.fc1.db, self.fc1.mb, self.fc1.vb)

        # LSTM updates
        self._adam(self.lstm.Wx, self.lstm.dWx, np.zeros_like(self.lstm.Wx), np.zeros_like(self.lstm.Wx))
        self._adam(self.lstm.Wh, self.lstm.dWh, np.zeros_like(self.lstm.Wh), np.zeros_like(self.lstm.Wh))
        self._adam(self.lstm.b,  self.lstm.db,  np.zeros_like(self.lstm.b),  np.zeros_like(self.lstm.b))


    def fit(self, Xtr, ytr, Xte, yte, epochs=15, batch=64):
        N = Xtr.shape[0]
        idxs = np.arange(N)

        for ep in range(1, epochs+1):
            np.random.shuffle(idxs)
            self.training = True
            losses = []

            for i in range(0, N, batch):
                b_idx = idxs[i:i+batch]
                xb = Xtr[b_idx]
                yb = ytr[b_idx]
                yb_oh = one_hot(yb, 12)

                logits, h1, h_att, hs_norm, att = self.forward(xb)
                probs = softmax(logits)
                loss = cross_entropy(probs, yb_oh)
                losses.append(loss)

                # backward: Dense2
                dlogits = (probs - yb_oh) / batch
                self.fc2.dW = h1.T @ dlogits
                self.fc2.db = dlogits.sum(axis=0)

                # backward: Dense1 (ReLU)
                dh1 = dlogits @ self.fc2.W.T
                drelu = dh1 * (h1 > 0)

                self.fc1.dW = self.fc1.x.T @ drelu
                self.fc1.db = drelu.sum(axis=0)

                # backward: LSTM ← h_att
                dh_att = drelu @ self.fc1.W.T
                dhs = np.repeat(dh_att[:,None,:], Xtr.shape[1], axis=1)
                self.lstm.backward(dhs.mean(axis=1))

                self.step()

            self.training = False
            pred = self.predict(Xte)
            f1 = macro_f1(yte, pred, 12)
            print(f"[Epoch {ep}] loss={np.mean(losses):.4f} | macroF1(te)={f1:.4f}")

        return f1


    def predict(self, X):
        logits, *_ = self.forward(X)
        return np.argmax(softmax(logits), axis=1) + 1


In [7]:
def run_lstm_crossval(X_all, y_all, s_all, pairs, epochs=15, batch=64, lr=1e-3):
    f1s = []
    for i, pair in enumerate(pairs, 1):
        X_tr, y_tr, X_te, y_te = split_by_subject(X_all, y_all, s_all, pair)
        X_tr, X_te, mean, std = standardize(X_tr, X_te)

        model = SimpleLSTMPlus(T=X_tr.shape[1], D=X_tr.shape[2], H=256)
        model.lr = lr

        print(f"\n=== Fold {i} | Test subjects={pair} ===")
        f1 = model.fit(X_tr, y_tr, X_te, y_te, epochs=epochs, batch=batch)
        f1s.append(f1)

        acc = np.mean(model.predict(X_te) == y_te)
        print(f"Fold {i} done. Acc={acc:.4f}, Macro-F1={f1:.4f}")

    print("\n=== Summary ===")
    print("Per-fold:", [f"{v:.4f}" for v in f1s])
    print("Mean:", np.mean(f1s))
    return f1s, float(np.mean(f1s))


In [8]:
f1s_lstm, mean_f1_lstm = run_lstm_crossval(
    X_all, y_all, s_all, pairs,
    epochs=15, batch=64, lr=1e-3
)

print("\n최종 평균 Macro-F1 (LSTM):", mean_f1_lstm)


=== Fold 1 | Test subjects=(1, 2) ===
[Epoch 1] loss=0.5831 | macroF1(te)=0.6180
[Epoch 2] loss=0.1533 | macroF1(te)=0.7459
[Epoch 3] loss=0.1914 | macroF1(te)=0.7669
[Epoch 4] loss=0.1420 | macroF1(te)=0.7074
[Epoch 5] loss=0.1322 | macroF1(te)=0.8128
[Epoch 6] loss=0.1189 | macroF1(te)=0.6490
[Epoch 7] loss=0.2355 | macroF1(te)=0.6849
[Epoch 8] loss=0.1017 | macroF1(te)=0.7758
[Epoch 9] loss=0.1646 | macroF1(te)=0.6168
[Epoch 10] loss=0.1664 | macroF1(te)=0.6910
[Epoch 11] loss=0.1246 | macroF1(te)=0.7191
[Epoch 12] loss=0.1607 | macroF1(te)=0.8207
[Epoch 13] loss=0.1852 | macroF1(te)=0.6941
[Epoch 14] loss=0.1549 | macroF1(te)=0.8318
[Epoch 15] loss=0.1191 | macroF1(te)=0.7954
Fold 1 done. Acc=0.7881, Macro-F1=0.7954

=== Fold 2 | Test subjects=(3, 4) ===
[Epoch 1] loss=0.5737 | macroF1(te)=0.8051
[Epoch 2] loss=0.1929 | macroF1(te)=0.8436
[Epoch 3] loss=0.1668 | macroF1(te)=0.8177
[Epoch 4] loss=0.3267 | macroF1(te)=0.7680
[Epoch 5] loss=0.1590 | macroF1(te)=0.8093
[Epoch 6] loss=