In [1]:
# ================================================
# 0) Imports
# ================================================
import os, re, random
import numpy as np
import pandas as pd
from collections import Counter

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight

import tensorflow as tf
from tensorflow.keras import layers, models, regularizers

SEED = 42
random.seed(SEED); np.random.seed(SEED); tf.random.set_seed(SEED)

In [2]:
# ================================================
# 1) 데이터 로드 & 기본 전처리
# ================================================
cols = ["subject", "label", "timestamp", "x", "y", "z"]
df = pd.read_csv(
    "/content/drive/MyDrive/data/WISDM_ar_v1.1_raw.txt",
    header=None, names=cols, on_bad_lines="skip"
).dropna()

df["z"] = df["z"].astype(str).str.replace(";", "", regex=False).astype(float)
df["x"] = df["x"].astype(float)
df["y"] = df["y"].astype(float)

df["subject"] = pd.to_numeric(df["subject"], errors="coerce").astype("Int64")
df["timestamp"] = pd.to_numeric(df["timestamp"], errors="coerce")
for c in ["x","y","z"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")

df = df.dropna().sort_values(["subject","label","timestamp"]).reset_index(drop=True)

print("샘플:\n", df.head(), "\n")
print("고유 subject:", df["subject"].nunique())
print("라벨 분포:", Counter(df["label"]))

샘플:
    subject       label      timestamp     x      y     z
0        1  Downstairs  6552942304000 -0.15   9.15 -0.34
1        1  Downstairs  6552992292000  0.11   9.19  2.76
2        1  Downstairs  6553042310000 -4.06   7.40  4.02
3        1  Downstairs  6553092298000 -2.87   7.93  3.21
4        1  Downstairs  6553142347000 -0.19  10.04  4.82 

고유 subject: 36
라벨 분포: Counter({'Walking': 418393, 'Jogging': 336445, 'Upstairs': 122869, 'Downstairs': 100425, 'Sitting': 59939, 'Standing': 48394})


In [3]:
# ================================================
# 2) get_frames (문자 라벨 대응: np.unique로 최빈값)
# ================================================
Fs = 20
frame_size = 200           # 200 timestep (10초 분량)
hop_size   = frame_size//2 # 50% overlap = 100

def get_frames(df, frame_size, hop_size):
    N_FEATURES = 3
    frames, labels = [], []
    xv = df["x"].to_numpy(); yv = df["y"].to_numpy(); zv = df["z"].to_numpy()
    lv = df["label"].to_numpy()
    for i in range(0, len(df) - frame_size, hop_size):
        x = xv[i:i+frame_size]; y = yv[i:i+frame_size]; z = zv[i:i+frame_size]
        seg = lv[i:i+frame_size]
        vals, counts = np.unique(seg, return_counts=True)
        label = vals[np.argmax(counts)]
        frames.append([x,y,z])
        labels.append(label)
    frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
    labels = np.asarray(labels)
    return frames, labels


In [4]:
# ================================================
# 3) 프레임 생성 + magnitude 채널 추가
# ================================================
X_raw, y_raw = get_frames(df, frame_size, hop_size)
print("X_raw:", X_raw.shape, "라벨:", Counter(y_raw))

# magnitude 채널 추가
mag = np.linalg.norm(X_raw, axis=2, keepdims=True) # (N,T,1)
X_raw = np.concatenate([X_raw, mag], axis=2)       # (N,T,4)

# 라벨 인코딩
le = LabelEncoder()
y = le.fit_transform(y_raw)
num_classes = len(le.classes_)
print("Classes:", list(le.classes_))

# train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_raw, y, test_size=0.2, random_state=SEED, stratify=y
)

X_raw: (10863, 200, 3) 라벨: Counter({np.str_('Walking'): 4185, np.str_('Jogging'): 3363, np.str_('Upstairs'): 1229, np.str_('Downstairs'): 1001, np.str_('Sitting'): 601, np.str_('Standing'): 484})
Classes: [np.str_('Downstairs'), np.str_('Jogging'), np.str_('Sitting'), np.str_('Standing'), np.str_('Upstairs'), np.str_('Walking')]


In [5]:
# ================================================
# 4) 표준화 (Train 기준으로 fit)
# ================================================
scaler = StandardScaler()
X_train_2d = X_train.reshape(-1, X_train.shape[-1])  # (N*T, 4)
X_test_2d  = X_test.reshape(-1,  X_test.shape[-1])

scaler.fit(X_train_2d)
X_train = scaler.transform(X_train_2d).reshape(X_train.shape)
X_test  = scaler.transform(X_test_2d ).reshape(X_test.shape)


In [6]:
# ================================================
# 5) CNN + LSTM 모델 정의
# ================================================
def build_cnn_lstm(input_shape, num_classes):
    inp = layers.Input(shape=input_shape)   # (T, 4)

    # CNN 블록
    x = layers.Conv1D(64, kernel_size=5, activation="relu", padding="same")(inp)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(2)(x)
    x = layers.Dropout(0.3)(x)

    x = layers.Conv1D(128, kernel_size=5, activation="relu", padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling1D(2)(x)
    x = layers.Dropout(0.3)(x)

    # LSTM 블록
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Bidirectional(layers.LSTM(64))(x)
    x = layers.Dropout(0.3)(x)

    # Dense 블록
    x = layers.Dense(128, activation="relu", kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.Dropout(0.3)(x)
    out = layers.Dense(num_classes, activation="softmax")(x)

    model = models.Model(inp, out)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-3),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

model = build_cnn_lstm((frame_size, 4), num_classes)
model.summary()

In [7]:
# ================================================
# 6) class_weight 적용
# ================================================
cw = compute_class_weight(class_weight="balanced",
                          classes=np.unique(y_train),
                          y=y_train)
class_weight = {i: w for i, w in enumerate(cw)}
print("class_weight:", class_weight)

class_weight: {0: np.float64(1.8081564710778193), 1: np.float64(0.5384138785625775), 2: np.float64(3.011088011088011), 3: np.float64(3.7424633936261844), 4: np.float64(1.4733808070532384), 5: np.float64(0.43259657506969335)}


In [8]:
# ================================================
# 7) 학습
# ================================================

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=60,
    batch_size=128,
    class_weight=class_weight,
    verbose=1
)


Epoch 1/60
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 44ms/step - accuracy: 0.5341 - loss: 1.1445 - val_accuracy: 0.5138 - val_loss: 1.0975
Epoch 2/60
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - accuracy: 0.7736 - loss: 0.5921 - val_accuracy: 0.3832 - val_loss: 1.7176
Epoch 3/60
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 32ms/step - accuracy: 0.8460 - loss: 0.4227 - val_accuracy: 0.3389 - val_loss: 1.8586
Epoch 4/60
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 34ms/step - accuracy: 0.9033 - loss: 0.3073 - val_accuracy: 0.4925 - val_loss: 1.1882
Epoch 5/60
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - accuracy: 0.9214 - loss: 0.2824 - val_accuracy: 0.6398 - val_loss: 0.9172
Epoch 6/60
[1m55/55[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - accuracy: 0.9408 - loss: 0.2119 - val_accuracy: 0.7434 - val_loss: 0.6899
Epoch 7/60
[1m55/55[0m [32m━━━

In [9]:
# ================================================
# 8) 평가
# ================================================
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"[Test] loss={test_loss:.4f}  acc={test_acc:.4f}")

y_pred = np.argmax(model.predict(X_test, verbose=0), axis=1)
print("\nClassification Report")
print(classification_report(y_test, y_pred, target_names=list(le.classes_)))

cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix (rows=true, cols=pred):\n", cm)

[Test] loss=0.0872  acc=0.9811

Classification Report
              precision    recall  f1-score   support

  Downstairs       0.93      0.99      0.96       200
     Jogging       0.99      0.99      0.99       673
     Sitting       0.99      0.99      0.99       120
    Standing       0.99      0.98      0.98        97
    Upstairs       0.97      0.91      0.94       246
     Walking       0.99      0.99      0.99       837

    accuracy                           0.98      2173
   macro avg       0.98      0.98      0.98      2173
weighted avg       0.98      0.98      0.98      2173


Confusion Matrix (rows=true, cols=pred):
 [[198   0   0   0   0   2]
 [  4 666   0   0   3   0]
 [  0   0 119   1   0   0]
 [  0   0   1  95   1   0]
 [  9   3   0   0 225   9]
 [  3   1   0   0   4 829]]
