In [1]:
# ==== Cell 1: Config & Imports ====
import os, math, time, gc, random
import numpy as np
import pandas as pd
from collections import Counter, defaultdict

# 평가/로그 도구(모델 아님)
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 재현성
SEED = 42
random.seed(SEED); np.random.seed(SEED)

# 경로/데이터 설정
DATA_PATH_TEMPLATE = "/content/drive/MyDrive/Colab Notebooks/mHealth_subject{sid}.log"

SAMPLE_RATE_HZ = 50
WIN = 64            # 윈도우 길이(타임스텝)
STRIDE = 64         # 스트라이드(겹침 없음)
MAJ_THRESH = 0.80   # 다수결 라벨 임계

# 채널/클래스
IN_CHANNELS = 23    # 1..23 (ECG 포함 2채널은 유지; 논문에선 ECG 사용 X 문구 있지만 여기선 그대로 사용 가능)
N_CLASSES  = 12     # 라벨 1..12 (0은 제외)

# 모델 하이퍼파라미터
CONV_FILTERS = 8
KERNEL_SIZE  = 3
POOL_SIZE    = 2

HIDDEN_UNITS = 32   # BiLSTM 각 방향 hidden size
LABEL_SMOOTHING = 0.0

# 학습
EPOCHS = 3          # 순수 NumPy라면 2~5로 시작 권장
BATCH_SIZE = 16
LR = 0.005
MOMENTUM = 0.9
GRAD_CLIP = 5.0
WEIGHT_DECAY = 0.0  # L2 (옵션)

# CV folds (subject-based)
FOLDS = [(1,2), (3,4), (5,6), (7,8), (9,10)]


In [None]:
# ==== Cell 2: Load & Merge ====
def load_subject_df(sid, path_tmpl=DATA_PATH_TEMPLATE):
    path = path_tmpl.format(sid=sid)
    df = pd.read_csv(path, sep=r'\s+', header=None, engine='python')
    # 문서 기준: 1..23은 피처, 24는 라벨
    df.columns = [f"f{i}" for i in range(1,24)] + ["label"]
    df["subject"] = sid
    return df

dfs = []
for sid in range(1, 11):
    df = load_subject_df(sid)
    dfs.append(df)
full_df = pd.concat(dfs, axis=0, ignore_index=True)

# 라벨 0 제거
full_df = full_df[full_df["label"] != 0].reset_index(drop=True)

print("통합 테이블(라벨!=0) 총 행 수:", len(full_df))
print(full_df.head(3))


In [None]:
# ==== Cell 3: Normalize & Windowing ====
# per-subject 표준화(z-score): 각 subject, 각 채널별 평균/표준편차로 정규화 (label 정보는 사용하지 않음)
def zscore_per_subject(df, feature_cols):
    out = []
    for sid, g in df.groupby("subject"):
        X = g[feature_cols].values
        mu = X.mean(axis=0, keepdims=True)
        std = X.std(axis=0, keepdims=True) + 1e-8
        Xn = (X - mu) / std
        gg = g.copy()
        gg[feature_cols] = Xn
        out.append(gg)
    return pd.concat(out, axis=0, ignore_index=True)

FEATURE_COLS = [f"f{i}" for i in range(1,24)]
full_df_norm = zscore_per_subject(full_df, FEATURE_COLS)

def make_windows_for_subject(df_subj, win=WIN, stride=STRIDE, maj_thresh=MAJ_THRESH):
    X = df_subj[FEATURE_COLS].values  # (T, 23)
    y = df_subj["label"].values       # (T,)
    T = len(df_subj)

    windows = []
    labels  = []
    for start in range(0, T - win + 1, stride):
        end = start + win
        seg_y = y[start:end]
        cnt = Counter(seg_y)
        maj_label, maj_count = cnt.most_common(1)[0]
        if maj_count / win >= maj_thresh:
            seg_x = X[start:end, :]
            windows.append(seg_x)   # (win, 23)
            labels.append(maj_label - 1)  # 1..12 -> 0..11
    if not windows:
        return np.empty((0, win, X.shape[1]), dtype=np.float32), np.empty((0,), dtype=np.int64)
    return np.stack(windows).astype(np.float32), np.array(labels, dtype=np.int64)

def build_dataset_for_subjects(subject_ids):
    Xs, Ys = [], []
    for sid in subject_ids:
        df_s = full_df_norm[full_df_norm["subject"] == sid].reset_index(drop=True)
        x, y = make_windows_for_subject(df_s)
        if len(y) > 0:
            Xs.append(x); Ys.append(y)
    if not Xs:
        return np.empty((0, WIN, len(FEATURE_COLS)), dtype=np.float32), np.empty((0,), dtype=np.int64)
    return np.concatenate(Xs, axis=0), np.concatenate(Ys, axis=0)

# 빠른 sanity check
trX, trY = build_dataset_for_subjects([1,2,3])
tsX, tsY = build_dataset_for_subjects([4])
print("예시 shapes:", trX.shape, trY.shape, tsX.shape, tsY.shape)


In [None]:
# ==== Cell 4: Utils (one-hot, batching, loss etc.) ====
def one_hot(y, num_classes):
    oh = np.zeros((len(y), num_classes), dtype=np.float32)
    oh[np.arange(len(y)), y] = 1.0
    return oh

def smooth_labels(y_onehot, eps=LABEL_SMOOTHING):
    if eps <= 0.0:
        return y_onehot
    K = y_onehot.shape[1]
    return (1 - eps) * y_onehot + eps / K

def iterate_minibatches(X, y, batch_size, shuffle=True):
    n = len(y)
    idx = np.arange(n)
    if shuffle:
        np.random.shuffle(idx)
    for i in range(0, n, batch_size):
        sel = idx[i:i+batch_size]
        yield X[sel], y[sel]

def softmax(z):
    z = z - z.max(axis=1, keepdims=True)
    ez = np.exp(z)
    return ez / (ez.sum(axis=1, keepdims=True) + 1e-12)

def cross_entropy(pred, target_onehot):
    # pred: (B, K) softmax prob, target: onehot
    eps = 1e-12
    return -np.sum(target_onehot * np.log(pred + eps), axis=1).mean()

def accuracy(pred_class, y):
    return (pred_class == y).mean()


In [None]:
# ==== Cell 5: Conv1D & MaxPool1D (from scratch) ====
class Conv1D:
    # Numpy Conv1D with padding='valid'
    def __init__(self, in_ch, out_ch, ksize):
        self.in_ch = in_ch
        self.out_ch = out_ch
        self.ksize = ksize
        # Xavier-like init
        lim = math.sqrt(6.0 / (in_ch*ksize + out_ch))
        self.W = np.random.uniform(-lim, lim, size=(out_ch, in_ch, ksize)).astype(np.float32)
        self.b = np.zeros((out_ch,), dtype=np.float32)
        # grads
        self.gW = np.zeros_like(self.W)
        self.gb = np.zeros_like(self.b)
        # cache
        self.cache = None

    def forward(self, x):
        # x: (B, T, C)
        B, T, C = x.shape
        K = self.ksize
        OC = self.out_ch
        out_T = T - K + 1
        y = np.zeros((B, out_T, OC), dtype=np.float32)
        # naive conv (can be sped up by im2col)
        for b in range(B):
            for t in range(out_T):
                xt = x[b, t:t+K, :]             # (K, C)
                # (OC, IC, K) dot (IC,K)->OC
                # rearrange xt -> (IC,K)
                xt2 = xt.T                       # (C, K)
                y[b, t, :] = (self.W.reshape(OC, -1) @ xt2.reshape(-1)) + self.b
        self.cache = x
        return y

    def backward(self, dy):
        # dy: (B, T_out, OC)
        x = self.cache
        B, T, C = x.shape
        OC, IC, K = self.W.shape
        T_out = dy.shape[1]
        dx = np.zeros_like(x)
        self.gW.fill(0.0); self.gb.fill(0.0)

        for b in range(B):
            for t in range(T_out):
                # grad w.r.t b
                self.gb += dy[b, t, :]
                # grad w.r.t W
                xt = x[b, t:t+K, :].T           # (IC,K)
                # add outer: (OC,1)*(1,IC*K)
                self.gW += dy[b, t, :].reshape(OC,1,1) * xt.reshape(1,IC,K)
                # grad w.r.t x
                # dL/dx_segment = sum_oc dy * W[oc]
                for oc in range(OC):
                    dx[b, t:t+K, :] += (dy[b, t, oc] * self.W[oc].T)  # (K,IC) add
        return dx

class ReLU:
    def __init__(self): self.mask = None
    def forward(self, x):
        self.mask = x > 0
        return x * self.mask
    def backward(self, dy):
        return dy * self.mask

class MaxPool1D:
    def __init__(self, pool_size):
        self.pool = pool_size
        self.cache = None
    def forward(self, x):
        # x: (B, T, C)
        B, T, C = x.shape
        P = self.pool
        assert T % P == 0, "Time length must be divisible by pool size"
        out_T = T // P
        y = np.zeros((B, out_T, C), dtype=np.float32)
        self.cache = (x, np.zeros_like(x, dtype=bool))
        _, mask = self.cache
        for t in range(out_T):
            seg = x[:, t*P:(t+1)*P, :]  # (B, P, C)
            m = seg.max(axis=1)         # (B, C)
            y[:, t, :] = m
            # mask for backprop
            maxpos = (seg == m[:, None, :])
            mask[:, t*P:(t+1)*P, :] = maxpos
        return y
    def backward(self, dy):
        # dy: (B, T_out, C)
        x, mask = self.cache
        B, T, C = x.shape
        P = self.pool
        out_T = dy.shape[1]
        dx = np.zeros_like(x)
        for t in range(out_T):
            dx[:, t*P:(t+1)*P, :] += dy[:, t, :][:, None, :] * mask[:, t*P:(t+1)*P, :]
        return dx


In [None]:
# ==== Cell 6: LSTM & BiLSTM (from scratch) ====
def sigmoid(x): return 1.0 / (1.0 + np.exp(-x))
def dsigmoid(y): return y * (1 - y)           # y = sigmoid(x)
def dtanh(y): return 1 - y*y                   # y = tanh(x)

class LSTM:
    """
    단일 방향 LSTM (many-to-many): 입력 (B,T,Din) -> 출력 (B,T,H)
    """
    def __init__(self, input_dim, hidden_dim):
        self.D = input_dim
        self.H = hidden_dim
        # Weights: x->(i,f,o,g), h->(i,f,o,g)
        # concat for speed: [xi, xf, xo, xg]
        lim = math.sqrt(1.0/(input_dim+hidden_dim))
        self.Wx = np.random.uniform(-lim, lim, size=(input_dim, 4*hidden_dim)).astype(np.float32)
        self.Wh = np.random.uniform(-lim, lim, size=(hidden_dim, 4*hidden_dim)).astype(np.float32)
        self.b  = np.zeros((4*hidden_dim,), dtype=np.float32)

        # grads
        self.gWx = np.zeros_like(self.Wx)
        self.gWh = np.zeros_like(self.Wh)
        self.gb  = np.zeros_like(self.b)

        # cache for BPTT
        self.cache = None

    def forward(self, x):
        B, T, D = x.shape
        H = self.H
        h = np.zeros((B, H), dtype=np.float32)
        c = np.zeros((B, H), dtype=np.float32)
        hs = np.zeros((B, T, H), dtype=np.float32)

        self.cache = {"x": x, "h": [None]*(T+1), "c": [None]*(T+1), "gates": [None]*T}
        self.cache["h"][0] = h.copy()
        self.cache["c"][0] = c.copy()

        for t in range(T):
            xt = x[:, t, :]                    # (B,D)
            a = xt @ self.Wx + h @ self.Wh + self.b  # (B,4H)
            ai, af, ao, ag = np.split(a, 4, axis=1)
            it = sigmoid(ai)
            ft = sigmoid(af)
            ot = sigmoid(ao)
            gt = np.tanh(ag)
            c = ft * c + it * gt
            h = ot * np.tanh(c)
            hs[:, t, :] = h

            self.cache["h"][t+1] = h.copy()
            self.cache["c"][t+1] = c.copy()
            self.cache["gates"][t] = (it, ft, ot, gt, ai, af, ao, ag)
        return hs

    def backward(self, dhs):
        # dhs: (B,T,H) grads wrt outputs
        x = self.cache["x"]
        B, T, D = x.shape
        H = self.H

        self.gWx.fill(0.0); self.gWh.fill(0.0); self.gb.fill(0.0)
        dx = np.zeros_like(x)
        dh_next = np.zeros((B, H), dtype=np.float32)
        dc_next = np.zeros((B, H), dtype=np.float32)

        for t in reversed(range(T)):
            it, ft, ot, gt, ai, af, ao, ag = self.cache["gates"][t]
            h_prev = self.cache["h"][t]
            c_prev = self.cache["c"][t]
            c_cur  = self.cache["c"][t+1]

            dh = dhs[:, t, :] + dh_next
            do = dh * np.tanh(c_cur)
            dco = dh * ot * (1 - np.tanh(c_cur)**2) + dc_next
            df = dco * c_prev
            di = dco * gt
            dg = dco * it

            dai = di * dsigmoid(it)
            daf = df * dsigmoid(ft)
            dao = do * dsigmoid(ot)
            dag = dg * dtanh(gt)

            da = np.concatenate([dai, daf, dao, dag], axis=1)   # (B,4H)

            self.gb += da.sum(axis=0)
            self.gWx += x[:, t, :].T @ da
            self.gWh += h_prev.T @ da

            dx[:, t, :] = da @ self.Wx.T
            dh_next = da @ self.Wh.T
            dc_next = dco * ft

        return dx

class BiLSTM:
    def __init__(self, input_dim, hidden_dim):
        self.fwd = LSTM(input_dim, hidden_dim)
        self.bwd = LSTM(input_dim, hidden_dim)
        self.H = hidden_dim

    def forward(self, x):
        # x: (B,T,D)
        y_f = self.fwd.forward(x)                # (B,T,H)
        y_b = self.bwd.forward(x[:, ::-1, :])    # (B,T,H) on reversed
        y_b = y_b[:, ::-1, :]                    # align time
        return np.concatenate([y_f, y_b], axis=2)  # (B,T,2H)

    def backward(self, dy):
        # dy: (B,T,2H)
        B, T, _ = dy.shape
        H = self.H
        dy_f = dy[:, :, :H]
        dy_b = dy[:, :, H:]

        dx_f = self.fwd.backward(dy_f)               # (B,T,D)
        # backward for reversed sequence
        dx_b_rev = self.bwd.backward(dy_b[:, ::-1, :])  # grads on reversed inputs
        dx_b = dx_b_rev[:, ::-1, :]
        return dx_f + dx_b

    @property
    def params_and_grads(self):
        return [
            (self.fwd.Wx, self.fwd.gWx), (self.fwd.Wh, self.fwd.gWh), (self.fwd.b, self.fwd.gb),
            (self.bwd.Wx, self.bwd.gWx), (self.bwd.Wh, self.bwd.gWh), (self.bwd.b, self.bwd.gb),
        ]


In [None]:
# ==== Cell 7: Dense & Model Compose ====
class Dense:
    def __init__(self, in_dim, out_dim):
        lim = math.sqrt(6.0/(in_dim+out_dim))
        self.W = np.random.uniform(-lim, lim, size=(in_dim, out_dim)).astype(np.float32)
        self.b = np.zeros((out_dim,), dtype=np.float32)
        self.gW = np.zeros_like(self.W)
        self.gb = np.zeros_like(self.b)
        self.cache = None

    def forward(self, x):
        # x: (B,T,Din) -> y: (B,T,Dout)
        y = x @ self.W + self.b
        self.cache = x
        return y

    def backward(self, dy):
        # dy: (B,T,Dout)
        x = self.cache
        B, T, D = x.shape
        self.gW.fill(0.0); self.gb.fill(0.0)
        # sum over B,T
        X2 = x.reshape(B*T, D)
        dY2 = dy.reshape(B*T, -1)
        self.gW += X2.T @ dY2
        self.gb += dY2.sum(axis=0)
        dx = dy @ self.W.T
        return dx

class CNN_BiLSTM_Classifier:
    def __init__(self, in_channels, conv_filters, kernel_size, pool_size, lstm_hidden, n_classes):
        self.conv = Conv1D(in_channels, conv_filters, kernel_size)
        self.relu = ReLU()
        self.pool = MaxPool1D(pool_size)
        # BiLSTM 입력 차원은 conv의 출력 채널 수
        self.bilstm = BiLSTM(conv_filters, lstm_hidden)
        self.fc = Dense(2*lstm_hidden, n_classes)  # time-distributed

        # 옵티마 상태 (SGD+momentum)
        self.opt_v = {}
        for p, g in self.params_and_grads:
            self.opt_v[id(p)] = np.zeros_like(p)

    @property
    def params_and_grads(self):
        items = [
            (self.conv.W, self.conv.gW), (self.conv.b, self.conv.gb),
            (self.bilstm.fwd.Wx, self.bilstm.fwd.gWx), (self.bilstm.fwd.Wh, self.bilstm.fwd.gWh), (self.bilstm.fwd.b, self.bilstm.fwd.gb),
            (self.bilstm.bwd.Wx, self.bilstm.bwd.gWx), (self.bilstm.bwd.Wh, self.bilstm.bwd.gWh), (self.bilstm.bwd.b, self.bilstm.bwd.gb),
            (self.fc.W, self.fc.gW), (self.fc.b, self.fc.gb)
        ]
        return items

    def forward(self, x):
        # x: (B,T,C_in)
        y = self.conv.forward(x)             # (B, T-K+1, F)
        y = self.relu.forward(y)             # (B, T', F)
        y = self.pool.forward(y)             # (B, T'', F)
        y = self.bilstm.forward(y)           # (B, T'', 2H)
        y = self.fc.forward(y)               # (B, T'', K)
        return y

    def backward(self, dlogits):
        dy = self.fc.backward(dlogits)       # (B,T'',2H)
        dy = self.bilstm.backward(dy)        # (B,T'',F)
        dy = self.pool.backward(dy)          # (B,T',F)
        dy = self.relu.backward(dy)          # (B,T',F)
        dx = self.conv.backward(dy)          # (B,T,C)
        return dx

    def sgd_step(self, lr=LR, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY, clip=GRAD_CLIP):
        # optional weight decay
        for p, g in self.params_and_grads:
            if weight_decay > 0 and p.ndim >= 2:
                g += weight_decay * p

        # global grad clipping (by value)
        if clip is not None:
            for _, g in self.params_and_grads:
                np.clip(g, -clip, clip, out=g)

        # momentum update
        for p, g in self.params_and_grads:
            v = self.opt_v[id(p)]
            v[:] = momentum * v - lr * g
            p += v

    def zero_grads(self):
        for _, g in self.params_and_grads:
            g.fill(0.0)


In [None]:
# ==== Cell 8: Train & Evaluate ====
def train_one_epoch(model, X, y, batch_size=BATCH_SIZE):
    model.zero_grads()
    losses = []
    accs = []
    for xb, yb in iterate_minibatches(X, y, batch_size, shuffle=True):
        # fwd
        logits_seq = model.forward(xb)           # (B,T_out,K)
        logits = logits_seq[:, -1, :]            # 마지막 타임스텝으로 시퀀스 분류
        probs  = softmax(logits)
        y_onehot = one_hot(yb, N_CLASSES)
        y_s = smooth_labels(y_onehot, LABEL_SMOOTHING)

        loss = cross_entropy(probs, y_s)
        losses.append(loss)
        accs.append(accuracy(np.argmax(probs, axis=1), yb))

        # bwd: dL/dlogits = probs - target
        dlogits = (probs - y_s) / len(yb)       # 평균의 도함수
        # 확장해서 시퀀스 형태로 (마지막 타임스텝만 nonzero)
        dlogits_seq = np.zeros_like(logits_seq)
        dlogits_seq[:, -1, :] = dlogits

        model.zero_grads()
        model.backward(dlogits_seq)
        model.sgd_step()

    return float(np.mean(losses)), float(np.mean(accs))

def evaluate(model, X, y, batch_size=256, verbose=False):
    preds = []
    for i in range(0, len(y), batch_size):
        xb = X[i:i+batch_size]
        logits_seq = model.forward(xb)
        logits = logits_seq[:, -1, :]
        probs = softmax(logits)
        preds.append(np.argmax(probs, axis=1))
    y_pred = np.concatenate(preds)
    acc = accuracy_score(y, y_pred)
    if verbose:
        print(classification_report(y, y_pred, digits=4))
        print("Confusion matrix:\n", confusion_matrix(y, y_pred))
    return acc, y_pred


In [None]:
# ==== Cell 9: 5-Fold Subject-wise CV ====
def fit_fold(train_sids, test_sids,
             epochs=EPOCHS,
             conv_filters=CONV_FILTERS,
             kernel_size=KERNEL_SIZE,
             pool_size=POOL_SIZE,
             hidden_units=HIDDEN_UNITS):
    # 데이터 구성
    Xtr, Ytr = build_dataset_for_subjects(train_sids)
    Xte, Yte = build_dataset_for_subjects(test_sids)

    print(f"Train windows: {len(Ytr)} | Test windows: {len(Yte)}")
    if len(Ytr) == 0 or len(Yte) == 0:
        print("데이터가 비어 fold를 건너뜁니다.")
        return None

    # 모델 초기화
    model = CNN_BiLSTM_Classifier(IN_CHANNELS, conv_filters, kernel_size,
                                  pool_size, hidden_units, N_CLASSES)

    # 학습 루프
    for ep in range(1, epochs+1):
        t0 = time.time()
        loss, tr_acc = train_one_epoch(model, Xtr, Ytr, BATCH_SIZE)
        te_acc, _ = evaluate(model, Xte, Yte, verbose=False)
        dt = time.time()-t0
        print(f"  Epoch {ep:02d} | loss={loss:.4f} | train_acc={tr_acc*100:.2f}% | test_acc={te_acc*100:.2f}%")
    return model, (Xte, Yte)

all_results = []
for fi, test_pair in enumerate([(1,2), (3,4), (5,6), (7,8), (9,10)], start=1):
    test_sids = list(test_pair)
    train_sids = [sid for sid in range(1,11) if sid not in test_sids]
    print("\n==============================")
    print(f"Fold {fi} | Test subjects = {tuple(test_sids)}")
    out = fit_fold(train_sids, test_sids)
    if out is None:
        all_results.append(np.nan)
        continue
    model, (Xte, Yte) = out
    te_acc, ypred = evaluate(model, Xte, Yte, verbose=True)
    print(f"Fold {fi} final Test Acc = {te_acc*100:.2f}%")
    all_results.append(te_acc)
    # 메모리 정리(다음 fold 속도)
    del model, Xte, Yte; gc.collect()

print("\n==== 5-Fold Summary ====")
for i, a in enumerate(all_results, 1):
    print(f"Fold {i}: {a*100:.2f}%" if not np.isnan(a) else f"Fold {i}: N/A")
print(f"Mean Acc: {np.nanmean(all_results)*100:.2f}%")


In [None]:
# ==== Cell 10: Tuning Tips (Optional) ====
# 1) 속도 ↑: HIDDEN_UNITS, CONV_FILTERS를 더 작게 (예: 16, 6)
# 2) 일반화: LABEL_SMOOTHING = 0.05 ~ 0.1 시도
# 3) 윈도우 더 짧게: WIN=48 (단, POOL_SIZE로 나눠떨어지게)
# 4) 학습 안정: LR=0.003, MOMENTUM=0.9 유지, GRAD_CLIP=5.0 유지
# 5) 시퀀스 표준화: 현재 per-subject z-score, 필요시 per-log 전체 z-score로 변경 가능
pass