In [1]:
# =============================
# 셀 1: 설정 (경로, 폴드, 하이퍼파라미터)
# =============================

DATA_PATH_TEMPLATE = "/content/drive/MyDrive/Colab Notebooks/mHealth_subject{sid}.log"

SUBJECT_IDS = [1,2,3,4,5,6,7,8,9,10]
FOLDS = [(1,2), (3,4), (5,6), (7,8), (9,10)]

SAMPLE_RATE_HZ = 50
WIN = 64
STRIDE = 64
MAJ_THRESH = 0.80

N_CLASSES = 12
IN_CHANNELS = 23

HIDDEN = 128
EPOCHS = 5
LR = 0.003
LR_DECAY = 0.90
CLIP = 1.0
SEED = 12345

# 진행 로그
PRINT_PROGRESS = True
PRINT_EVERY = 400   # 출력 자주하면 느려짐 -> 400으로 완화

# ==== 속도 옵션 ====
USE_TBPTT = True    # True면 최근 TBPTT_K 스텝만 역전파
TBPTT_K   = 32      # 16~48 권장 (WIN=64 기준)


In [2]:
# =============================
# 셀 2: 유틸 (수학 함수, RNG, 벡터/행렬 연산)
# =============================

PI = 3.141592653589793
E  = 2.718281828459045

def _exp(x):
    if x > 40.0: x = 40.0
    if x < -40.0: x = -40.0
    return (E ** x)

def _tanh(x):
    ex = _exp(x); em = _exp(-x)
    return (ex - em) / (ex + em)

def _sigmoid(x):
    if x >= 0:
        z = _exp(-x); return 1.0 / (1.0 + z)
    else:
        z = _exp(x);  return z / (1.0 + z)

def _ln(y):
    if y <= 0: y = 1e-12
    lo, hi = -40.0, 40.0
    for _ in range(50):
        mid = (lo+hi)/2.0
        em = _exp(mid)
        if em < y: lo = mid
        else: hi = mid
    return (lo+hi)/2.0

def _log_safe(p):
    if p < 1e-12: p = 1e-12
    return _ln(p)

def argmax(v):
    if not v: return -1
    mi, mv = 0, v[0]
    for i in range(1, len(v)):
        if v[i] > mv: mv, mi = v[i], i
    return mi

# RNG (LCG)
class LCG:
    def __init__(self, seed=1):
        self.state = seed & 0xFFFFFFFF
    def rnd(self):
        self.state = (1664525 * self.state + 1013904223) & 0xFFFFFFFF
        return self.state / 4294967296.0
    def uniform(self, a, b): return a + (b - a) * self.rnd()
    def choice(self, n): return int(self.rnd() * n)
    def shuffle(self, arr):
        for i in range(len(arr)-1, 0, -1):
            j = int(self.rnd() * (i+1))
            arr[i], arr[j] = arr[j], arr[i]

_rng = LCG(SEED)

# 벡터/행렬 도우미
def zeros(n): return [0.0]*n
def zeros2(n, m): return [[0.0]*m for _ in range(n)]

def randn(n, m, scale=0.01):
    W = []
    for i in range(n):
        row = []
        for j in range(m):
            s = 0.0
            for _ in range(12): s += _rng.rnd()
            g = (s - 6.0)  # ~N(0,1)
            row.append(g * scale)
        W.append(row)
    return W

def add_inplace(a, b):
    for i in range(len(a)): a[i] += b[i]

def add2_inplace(A, B):
    for i in range(len(A)):
        for j in range(len(A[0])): A[i][j] += B[i][j]

def clip_inplace(v, clip):
    s = 0.0
    for x in v: s += x*x
    if s <= clip*clip: return
    scale = clip / (s ** 0.5)
    for i in range(len(v)): v[i] *= scale

def clip2_inplace(M, clip):
    s = 0.0
    for i in range(len(M)):
        for j in range(len(M[0])): s += M[i][j]*M[i][j]
    if s <= clip*clip: return
    scale = clip / (s ** 0.5)
    for i in range(len(M)):
        for j in range(len(M[0])): M[i][j] *= scale


In [3]:
# =============================
# 셀 3: 데이터 파싱/로딩/표준화/윈도
# =============================

def parse_row(line):
    parts, cur = [], ''
    for ch in line:
        if ch in ('\t', ' '):
            if cur != '': parts.append(cur); cur = ''
        else:
            cur += ch
    if cur != '': parts.append(cur)
    if len(parts) < 24: return None
    feats = [float(parts[i]) for i in range(23)]
    label = int(float(parts[23]))
    return feats, label

def load_all_subjects():
    data = {}
    for sid in SUBJECT_IDS:
        path = DATA_PATH_TEMPLATE.replace('{sid}', str(sid))
        rows = []
        try:
            with open(path, 'r') as f:
                for line in f:
                    pr = parse_row(line.strip())
                    if pr is None: continue
                    x, y = pr
                    if y == 0: continue  # null 제거
                    rows.append((x, y))
        except Exception as e:
            print("[WARN] 읽기 실패:", path, "-", e)
        data[sid] = rows
    return data

def compute_mean_std(examples):
    n = len(examples)
    if n == 0: return [0.0]*IN_CHANNELS, [1.0]*IN_CHANNELS
    mean = [0.0]*IN_CHANNELS
    for x in examples:
        for i in range(IN_CHANNELS): mean[i] += x[i]
    for i in range(IN_CHANNELS): mean[i] /= n
    var = [0.0]*IN_CHANNELS
    for x in examples:
        for i in range(IN_CHANNELS):
            d = x[i]-mean[i]; var[i] += d*d
    for i in range(IN_CHANNELS):
        var[i] = var[i]/n
    std = [ (v**0.5) if v>1e-12 else 1.0 for v in var ]
    return mean, std

def standardize_inplace(examples, mean, std):
    for x in examples:
        for i in range(IN_CHANNELS):
            x[i] = (x[i] - mean[i]) / std[i]

def window_subject(rows, win=WIN, stride=STRIDE, maj_thresh=MAJ_THRESH):
    X, Y = [], []
    n, i = len(rows), 0
    while i + win <= n:
        counts = [0]*(N_CLASSES+1)  # 1..12 사용
        for k in range(i, i+win): counts[rows[k][1]] += 1
        maj_lab, maj_cnt = 0, 0
        for lab in range(1, N_CLASSES+1):
            if counts[lab] > maj_cnt: maj_cnt, maj_lab = counts[lab], lab
        if maj_cnt >= int(maj_thresh*win):
            seq = [rows[k][0][:] for k in range(i, i+win)]
            X.append(seq); Y.append(maj_lab-1)
        i += stride
    return X, Y

def build_dataset(rows_by_subject, test_subjects):
    train_rows, test_rows = [], []
    for sid, rows in rows_by_subject.items():
        (test_rows if sid in test_subjects else train_rows).extend([(x[:], y) for x,y in rows])

    train_feats = [x for (x,_) in train_rows]
    mean, std = compute_mean_std(train_feats)
    standardize_inplace(train_feats, mean, std)
    for i in range(len(test_rows)):
        x,y = test_rows[i]
        for j in range(IN_CHANNELS): x[j] = (x[j]-mean[j])/std[j]
        test_rows[i] = (x,y)

    train_X, train_Y = window_subject(train_rows, WIN, STRIDE, MAJ_THRESH)
    test_X,  test_Y  = window_subject(test_rows,  WIN, STRIDE, MAJ_THRESH)
    return train_X, train_Y, test_X, test_Y

def build_combined_table(rows_by_subject):
    combined = []
    for sid in SUBJECT_IDS:
        for x,y in rows_by_subject.get(sid, []):
            combined.append((x[:], y, sid))
    return combined


In [4]:
# =============================
# 셀 4: LSTM 분류기 (from scratch) + Adam + 최적화 커널 + (선택)TBPTT
# =============================

class LSTMClassifier:
    def __init__(self, input_dim, hidden_dim, num_classes, lr=0.001, clip=1.0):
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_classes = num_classes
        self.lr = lr
        self.clip = clip
        H, D, C = hidden_dim, input_dim, num_classes

        # 가중치
        self.Wx = randn(D, 4*H, scale=(1.0/(D**0.5)))
        self.Wh = randn(H, 4*H, scale=(1.0/(H**0.5)))
        self.b  = [0.0]*(4*H)
        # Forget 게이트 바이어스 +1.0
        for k in range(H, 2*H): self.b[k] = 1.0

        self.Wy = randn(H, C, scale=(1.0/(H**0.5)))
        self.by = [0.0]*C

        # Adam 상태
        self.t = 0
        self.m_Wx = zeros2(D, 4*H); self.v_Wx = zeros2(D, 4*H)
        self.m_Wh = zeros2(H, 4*H); self.v_Wh = zeros2(H, 4*H)
        self.m_b  = zeros(4*H);     self.v_b  = zeros(4*H)
        self.m_Wy = zeros2(H, C);   self.v_Wy = zeros2(H, C)
        self.m_by = zeros(C);       self.v_by = zeros(C)

    # ---- 빠른 커널: v^T M (i-outer) ----
    # out[j] = sum_i v[i]*M[i][j]
    def _vecmat_fast(self, v, M):
        n_rows = len(M)
        if n_rows == 0: return []
        n_cols = len(M[0])
        out = [0.0]*n_cols
        for i in range(n_rows):
            vi = v[i]
            row = M[i]
            for j in range(n_cols):
                out[j] += vi * row[j]
        return out

    def _vecadd_inplace(self, a, b):
        for i in range(len(a)): a[i] += b[i]

    def _softmax(self, logits):
        mx = logits[0]
        for x in logits[1:]:
            if x > mx: mx = x
        exps = [_exp(x - mx) for x in logits]
        s = 0.0
        for e in exps: s += e
        return [e/s for e in exps]

    def forward(self, seq):
        H, T = self.hidden_dim, len(seq)
        caches, h, c = [], [0.0]*H, [0.0]*H
        for t in range(T):
            x = seq[t]
            # z = x@Wx + h@Wh + b
            z = self._vecmat_fast(x, self.Wx)
            add_inplace(z, self._vecmat_fast(h, self.Wh))
            self._vecadd_inplace(z, self.b)

            # 게이트
            i_t = [0.0]*H; f_t=[0.0]*H; o_t=[0.0]*H; g_t=[0.0]*H
            for k in range(H):
                i_t[k] = _sigmoid(z[k])
                f_t[k] = _sigmoid(z[H + k])
                o_t[k] = _sigmoid(z[2*H + k])
                g_t[k] = _tanh(   z[3*H + k])
            # 상태
            c = [ f_t[k]*c[k] + i_t[k]*g_t[k] for k in range(H) ]
            tanh_c = [_tanh(c[k]) for k in range(H)]
            h = [ o_t[k]*tanh_c[k] for k in range(H) ]
            caches.append((x, i_t, f_t, o_t, g_t, c, tanh_c, h))
        # 출력
        logits = self._vecmat_fast(h, self.Wy)
        self._vecadd_inplace(logits, self.by)
        probs = self._softmax(logits)
        return probs, logits, caches

    def backward(self, caches, probs, target_idx):
        H, D, C, T = self.hidden_dim, self.input_dim, self.num_classes, len(caches)

        dWx = zeros2(D, 4*H); dWh = zeros2(H, 4*H); db = zeros(4*H)
        dWy = zeros2(H, C);   dby = zeros(C)

        dlogits = probs[:]; dlogits[target_idx] -= 1.0

        # dWy, dby, dh_next
        last_h = caches[-1][-1]
        for i in range(H):
            hi = last_h[i]
            row = dWy[i]
            for j in range(C): row[j] += hi * dlogits[j]
        for j in range(C): dby[j] += dlogits[j]

        dh_next = [0.0]*H
        for i in range(H):
            s = 0.0
            rowWy = self.Wy[i]
            for j in range(C): s += rowWy[j] * dlogits[j]
            dh_next[i] = s
        dc_next = [0.0]*H

        # ---- Truncated BPTT (옵션) ----
        t_start = 0
        if USE_TBPTT:
            t_start = max(0, T - TBPTT_K)

        for t in range(T-1, t_start-1, -1):
            x, i_t, f_t, o_t, g_t, c_t, tanh_c, h_t = caches[t]
            c_prev = [0.0]*H if t==0 else caches[t-1][5]
            h_prev = [0.0]*H if t==0 else caches[t-1][7]

            # h_t = o_t * tanh(c_t)
            do = [ dh_next[k]*tanh_c[k] for k in range(H) ]
            dtanh_c = [ dh_next[k]*o_t[k] for k in range(H) ]
            dc = [ dtanh_c[k]*(1.0 - tanh_c[k]*tanh_c[k]) + dc_next[k] for k in range(H) ]

            # c_t = f_t*c_{t-1} + i_t*g_t
            df = [ dc[k]*c_prev[k] for k in range(H) ]
            di = [ dc[k]*g_t[k] for k in range(H) ]
            dg = [ dc[k]*i_t[k] for k in range(H) ]
            dc_next = [ dc[k]*f_t[k] for k in range(H) ]

            # pre-act
            di_pre = [ di[k]*i_t[k]*(1.0 - i_t[k]) for k in range(H) ]
            df_pre = [ df[k]*f_t[k]*(1.0 - f_t[k]) for k in range(H) ]
            do_pre = [ do[k]*o_t[k]*(1.0 - o_t[k]) for k in range(H) ]
            dg_pre = [ dg[k]*(1.0 - g_t[k]*g_t[k]) for k in range(H) ]
            dz = di_pre + df_pre + do_pre + dg_pre  # 4H

            # dWx (i-outer) : dWx[i][k] += x[i]*dz[k]
            for i in range(D):
                xi = x[i]; row = dWx[i]
                for k in range(4*H): row[k] += xi * dz[k]
            # dWh : dWh[i][k] += h_prev[i]*dz[k]
            for i in range(H):
                hip = h_prev[i]; row = dWh[i]
                for k in range(4*H): row[k] += hip * dz[k]
            # db
            for k in range(4*H): db[k] += dz[k]

            # dh_prev = Wh * dz  (k-outer → i-accumulate)
            dh_prev = [0.0]*H
            # for k in range(4*H):
            #     val = dz[k]
            #     for i in range(H): dh_prev[i] += val * self.Wh[i][k]
            # 위 루프보다 약간 더 캐시 친화적으로:
            for i in range(H):
                s = 0.0
                rowWh = self.Wh[i]
                for k in range(4*H): s += rowWh[k] * dz[k]
                dh_prev[i] = s
            dh_next = dh_prev

        # 클리핑
        clip2_inplace(dWx, self.clip); clip2_inplace(dWh, self.clip); clip_inplace(db, self.clip)
        clip2_inplace(dWy, self.clip); clip_inplace(dby, self.clip)

        return dWx, dWh, db, dWy, dby

    # Adam
    def _adam_update(self, W, dW, m, v, lr, t, beta1=0.9, beta2=0.999, eps=1e-8):
        nb = len(W); mb = len(W[0]) if nb>0 else 0
        b1t = (1.0 - (beta1**t)); b2t = (1.0 - (beta2**t))
        for i in range(nb):
            rowW, rowG, rowM, rowV = W[i], dW[i], m[i], v[i]
            for j in range(mb):
                g = rowG[j]
                rowM[j] = beta1*rowM[j] + (1.0-beta1)*g
                rowV[j] = beta2*rowV[j] + (1.0-beta2)*g*g
                mhat = rowM[j] / b1t
                vhat = rowV[j] / b2t
                rowW[j] -= lr * (mhat / ((vhat**0.5) + eps))

    def _adam_update_vec(self, w, dw, m, v, lr, t, beta1=0.9, beta2=0.999, eps=1e-8):
        b1t = (1.0 - (beta1**t)); b2t = (1.0 - (beta2**t))
        for j in range(len(w)):
            g = dw[j]
            mj = m[j] = beta1*m[j] + (1.0-beta1)*g
            vj = v[j] = beta2*v[j] + (1.0-beta2)*g*g
            mhat = mj / b1t
            vhat = vj / b2t
            w[j] -= lr * (mhat / ((vhat**0.5) + eps))

    def step(self, grads):
        dWx, dWh, db, dWy, dby = grads
        self.t += 1
        lr = self.lr
        self._adam_update(self.Wx, dWx, self.m_Wx, self.v_Wx, lr, self.t)
        self._adam_update(self.Wh, dWh, self.m_Wh, self.v_Wh, lr, self.t)
        self._adam_update_vec(self.b,  db,  self.m_b,  self.v_b,  lr, self.t)
        self._adam_update(self.Wy, dWy, self.m_Wy, self.v_Wy, lr, self.t)
        self._adam_update_vec(self.by, dby, self.m_by, self.v_by, lr, self.t)

    def train_epoch(self, X, Y):
        idxs = list(range(len(X)))
        _rng.shuffle(idxs)
        total_loss = 0.0
        hit = 0
        for it, idx in enumerate(idxs, start=1):
            seq, y = X[idx], Y[idx]
            probs, _, caches = self.forward(seq)
            loss = -_log_safe(probs[y])
            total_loss += loss
            if argmax(probs) == y: hit += 1

            grads = self.backward(caches, probs, y)
            self.step(grads)

            if PRINT_PROGRESS and (it % PRINT_EVERY == 0 or it == len(idxs)):
                running_loss = total_loss / it
                running_acc = (hit / it) * 100.0
                print(f"  progress: {it} / {len(idxs)} | loss= {running_loss:.4f} | acc= {running_acc:.2f}%")
        epoch_acc = (hit / len(X)) if len(X) else 0.0
        return (total_loss / max(1, len(X))), epoch_acc

    def evaluate(self, X, Y):
        correct = 0
        for i in range(len(X)):
            probs, _, _ = self.forward(X[i])
            if argmax(probs) == Y[i]: correct += 1
        return (correct / len(X)) if len(X) else 0.0


In [None]:
# =============================
# 셀 5: 학습 루프 & 5-폴드 교차검증
# =============================

def run_cv():
    print("데이터 로딩...")
    rows_by_subject = load_all_subjects()

    combined = build_combined_table(rows_by_subject)
    print("통합 테이블(라벨!=0) 총 행 수:", len(combined))

    fold_results = []
    for fold_idx, test_pair in enumerate(FOLDS):
        print("\n==============================")
        print("Fold", fold_idx+1, "| Test subjects =", test_pair)
        train_X, train_Y, test_X, test_Y = build_dataset(rows_by_subject, set(test_pair))
        print("Train windows:", len(train_X), "| Test windows:", len(test_X))

        model = LSTMClassifier(IN_CHANNELS, HIDDEN, N_CLASSES, lr=LR, clip=CLIP)
        cur_lr = LR

        for ep in range(1, EPOCHS+1):
            model.lr = cur_lr
            loss, tr_acc = model.train_epoch(train_X, train_Y)
            te_acc = model.evaluate(test_X, test_Y)
            print(f"  Epoch {ep:02d} | lr={cur_lr:.5f} | loss={loss:.4f} | train_acc={tr_acc*100:.2f}% | test_acc={te_acc*100:.2f}%")
            cur_lr *= LR_DECAY  # 스케줄

        fold_results.append(te_acc)

    avg = sum(fold_results)/len(fold_results) if fold_results else 0.0
    print("\nCV test accuracies:")
    for i,a in enumerate(fold_results):
        print(f"  Fold {i+1}: {a*100:.2f}%")
    print(f"Average: {avg*100:.2f}%")

# 실행
run_cv()


데이터 로딩...
통합 테이블(라벨!=0) 총 행 수: 343195

Fold 1 | Test subjects = (1, 2)
Train windows: 4195 | Test windows: 1094
  progress: 400 / 4195 | loss= 1.1963 | acc= 59.50%
  progress: 800 / 4195 | loss= 0.9314 | acc= 71.38%
  progress: 1200 / 4195 | loss= 0.8312 | acc= 74.58%
  progress: 1600 / 4195 | loss= 0.7042 | acc= 78.62%
  progress: 2000 / 4195 | loss= 0.6571 | acc= 80.60%
  progress: 2400 / 4195 | loss= 0.6237 | acc= 81.96%
