In [13]:
# ================================================
# 0) Imports
# ================================================
import numpy as np
import pandas as pd
from collections import Counter

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.utils import to_categorical

In [14]:
# ================================================
# 1) 데이터 로드 & 기본 전처리
# ================================================
cols = ["subject", "label", "timestamp", "x", "y", "z"]
df = pd.read_csv(
    "/content/drive/MyDrive/data/WISDM_ar_v1.1_raw.txt",
    header=None, names=cols, on_bad_lines="skip"
).dropna()

# z 끝 세미콜론 제거 + float 변환
df["z"] = df["z"].astype(str).str.replace(";", "", regex=False).astype(float)
df["x"] = df["x"].astype(float)
df["y"] = df["y"].astype(float)

print("샘플:\n", df.head(), "\n")
print("라벨 분포:", Counter(df["label"]))


샘플:
    subject    label       timestamp         x          y         z
0       33  Jogging  49105962326000 -0.694638  12.680544  0.503953
1       33  Jogging  49106062271000  5.012288  11.264028  0.953424
2       33  Jogging  49106112167000  4.903325  10.882658 -0.081722
3       33  Jogging  49106222305000 -0.612916  18.496431  3.023717
4       33  Jogging  49106332290000 -1.184970  12.108489  7.205164 

라벨 분포: Counter({'Walking': 418393, 'Jogging': 336445, 'Upstairs': 122869, 'Downstairs': 100425, 'Sitting': 59939, 'Standing': 48394})


In [23]:
# ================================================
# 2) Sliding Window (window=80, hop=100)
#    최빈 라벨로 프레임 라벨 부여
# ================================================
Fs = 20
frame_size = Fs*4 # 80
hop_size = Fs*2 # 40 -> 50% 오버래핑

N_FEATURES = 3

def get_frames(df, frame_size, hop_size):
    frames, labels = [], []
    lab = df["label"].to_numpy()
    xv = df["x"].to_numpy(); yv = df["y"].to_numpy(); zv = df["z"].to_numpy()

    for i in range(0, len(df) - frame_size, hop_size):
        x = xv[i:i+frame_size]
        y = yv[i:i+frame_size]
        z = zv[i:i+frame_size]

        seg = lab[i:i+frame_size]
        vals, counts = np.unique(seg, return_counts=True)
        label = vals[np.argmax(counts)]  # 최빈값

        frames.append([x, y, z])
        labels.append(label)

    frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
    labels = np.asarray(labels)
    return frames, labels

X, y_raw = get_frames(df, frame_size, hop_size)
print("X shape:", X.shape)
print("프레임 기준 라벨 분포:", Counter(y_raw))

X shape: (27160, 80, 3)
프레임 기준 라벨 분포: Counter({np.str_('Walking'): 10462, np.str_('Jogging'): 8412, np.str_('Upstairs'): 3068, np.str_('Downstairs'): 2513, np.str_('Sitting'): 1494, np.str_('Standing'): 1211})


In [24]:
# ================================================
# 3) 라벨 인코딩 (원-핫)
# ================================================
le = LabelEncoder()
y_int = le.fit_transform(y_raw)
num_classes = len(le.classes_)
print("Classes:", list(le.classes_))
y = to_categorical(y_int, num_classes=num_classes)  # one-hot

Classes: [np.str_('Downstairs'), np.str_('Jogging'), np.str_('Sitting'), np.str_('Standing'), np.str_('Upstairs'), np.str_('Walking')]


In [25]:
# ================================================
# 4) Train/Test Split (랜덤, stratify)
# ================================================
X_train, X_test, y_train, y_test, y_int_train, y_int_test = train_test_split(
    X, y, y_int, test_size=0.2, random_state=42, stratify=y_int
)

In [26]:
# ================================================
# 5) 표준화 (채널별, Train으로 fit → Train/Test transform)
# ================================================
scaler = StandardScaler()
X_train_2d = X_train.reshape(-1, X_train.shape[-1])  # (N*T, 3)
X_test_2d  = X_test.reshape(-1,  X_test.shape[-1])

scaler.fit(X_train_2d)
X_train = scaler.transform(X_train_2d).reshape(X_train.shape)
X_test  = scaler.transform(X_test_2d ).reshape(X_test.shape)

In [27]:
# ================================================
# 6) LSTM 모델 (Sequential)
#    LSTM(128)->Dropout(0.5)->LSTM(64)->Dropout(0.5)
#    Dense(128)->Dropout(0.5)->Dense(num_classes, softmax)
#    loss=categorical_crossentropy (원-핫)
# ================================================
def build_lstm(input_shape, num_classes):
    model = Sequential()
    model.add(LSTM(128, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.5))
    model.add(LSTM(64))
    model.add(Dropout(0.5))
    model.add(Dense(128, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation="softmax"))
    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-3),
        loss="categorical_crossentropy",  # one-hot 라벨
        metrics=["accuracy"]
    )
    return model

model = build_lstm((frame_size, N_FEATURES), num_classes)
model.summary()

  super().__init__(**kwargs)


In [28]:
# ================================================
# 7) 학습
# ================================================

history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=30,
    batch_size=128,   # LSTM은 연산량↑ → 128 권장
    verbose=1
)


Epoch 1/30
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 19ms/step - accuracy: 0.4825 - loss: 1.3601 - val_accuracy: 0.7248 - val_loss: 0.8241
Epoch 2/30
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.7208 - loss: 0.8421 - val_accuracy: 0.7145 - val_loss: 0.7911
Epoch 3/30
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.7492 - loss: 0.7227 - val_accuracy: 0.7918 - val_loss: 0.6014
Epoch 4/30
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.7768 - loss: 0.6382 - val_accuracy: 0.8088 - val_loss: 0.5403
Epoch 5/30
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.7894 - loss: 0.6072 - val_accuracy: 0.8113 - val_loss: 0.5341
Epoch 6/30
[1m136/136[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.8001 - loss: 0.5733 - val_accuracy: 0.8173 - val_loss: 0.5020
Epoch 7/30
[1m136/136

In [29]:
# ================================================
# 8) 평가
# ================================================
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"[Test] loss={test_loss:.4f}  acc={test_acc:.4f}")

y_pred_int = np.argmax(model.predict(X_test, verbose=0), axis=1)

print("\nClassification Report")
print(classification_report(y_int_test, y_pred_int, target_names=list(le.classes_)))

cm = confusion_matrix(y_int_test, y_pred_int)
print("\nConfusion Matrix (rows=true, cols=pred):\n", cm)

[Test] loss=0.1720  acc=0.9439

Classification Report
              precision    recall  f1-score   support

  Downstairs       0.80      0.85      0.82       503
     Jogging       0.98      0.98      0.98      1682
     Sitting       0.99      0.94      0.97       299
    Standing       0.93      0.99      0.96       242
    Upstairs       0.87      0.80      0.84       614
     Walking       0.96      0.97      0.97      2092

    accuracy                           0.94      5432
   macro avg       0.92      0.92      0.92      5432
weighted avg       0.94      0.94      0.94      5432


Confusion Matrix (rows=true, cols=pred):
 [[ 428    0    0    1   43   31]
 [  12 1656    0    0    5    9]
 [   3    0  282   14    0    0]
 [   1    0    1  240    0    0]
 [  58   26    1    2  492   35]
 [  35    4    0    1   23 2029]]
