In [1]:
# ================================================
# 0) Imports
# ================================================
import numpy as np
import pandas as pd
from collections import Counter

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, BatchNormalization, Dropout, Flatten, Dense
from tensorflow.keras.utils import to_categorical

In [2]:
# ================================================
# 1) 데이터 로드 & 기본 전처리
# ================================================
cols = ["subject", "label", "timestamp", "x", "y", "z"]

df = pd.read_csv(
    "/content/drive/MyDrive/data/WISDM_ar_v1.1_raw.txt",
    header=None,
    names=cols,
    on_bad_lines="skip"
).dropna()

df["z"] = df["z"].astype(str).str.replace(";", "", regex=False).astype(float)
df["x"] = df["x"].astype(float)
df["y"] = df["y"].astype(float)

print("샘플:\n", df.head(), "\n")
print("라벨 분포:", Counter(df["label"]))

샘플:
    subject    label       timestamp         x          y         z
0       33  Jogging  49105962326000 -0.694638  12.680544  0.503953
1       33  Jogging  49106062271000  5.012288  11.264028  0.953424
2       33  Jogging  49106112167000  4.903325  10.882658 -0.081722
3       33  Jogging  49106222305000 -0.612916  18.496431  3.023717
4       33  Jogging  49106332290000 -1.184970  12.108489  7.205164 

라벨 분포: Counter({'Walking': 418393, 'Jogging': 336445, 'Upstairs': 122869, 'Downstairs': 100425, 'Sitting': 59939, 'Standing': 48394})


In [3]:
df

Unnamed: 0,subject,label,timestamp,x,y,z
0,33,Jogging,49105962326000,-0.694638,12.680544,0.503953
1,33,Jogging,49106062271000,5.012288,11.264028,0.953424
2,33,Jogging,49106112167000,4.903325,10.882658,-0.081722
3,33,Jogging,49106222305000,-0.612916,18.496431,3.023717
4,33,Jogging,49106332290000,-1.184970,12.108489,7.205164
...,...,...,...,...,...,...
1086461,19,Sitting,131623331483000,9.000000,-1.570000,1.690000
1086462,19,Sitting,131623371431000,9.040000,-1.460000,1.730000
1086463,19,Sitting,131623411592000,9.080000,-1.380000,1.690000
1086464,19,Sitting,131623491487000,9.000000,-1.460000,1.730000


In [4]:
# ================================================
# 2) Sliding Window (window=200, hop=100)
# ================================================
Fs = 20
frame_size = Fs*4 # 80
hop_size = Fs*2 # 40 -> 50% 오버래핑

N_FEATURES = 3

def get_frames(df, frame_size, hop_size):
    frames, labels = [], []
    lab = df['label'].to_numpy()
    xv = df['x'].to_numpy(); yv = df['y'].to_numpy(); zv = df['z'].to_numpy()

    for i in range(0, len(df) - frame_size, hop_size):
        x = xv[i: i + frame_size]
        y = yv[i: i + frame_size]
        z = zv[i: i + frame_size]

        seg = lab[i: i + frame_size]
        vals, counts = np.unique(seg, return_counts=True)
        label = vals[np.argmax(counts)]  # 최빈값

        frames.append([x, y, z])
        labels.append(label)

    frames = np.asarray(frames).reshape(-1, frame_size, N_FEATURES)
    labels = np.asarray(labels)
    return frames, labels

X, y_raw = get_frames(df, frame_size, hop_size)
print("X shape:", X.shape)
print("라벨 분포(프레임 기준):", Counter(y_raw))

X shape: (27160, 80, 3)
라벨 분포(프레임 기준): Counter({np.str_('Walking'): 10462, np.str_('Jogging'): 8412, np.str_('Upstairs'): 3068, np.str_('Downstairs'): 2513, np.str_('Sitting'): 1494, np.str_('Standing'): 1211})


In [5]:
# ================================================
# 3) 라벨 인코딩 (원-핫 인코딩)
# ================================================
le = LabelEncoder()
y = le.fit_transform(y_raw)
num_classes = len(le.classes_)
print("Classes:", list(le.classes_))

y_cat = to_categorical(y, num_classes=num_classes)  # one-hot

Classes: [np.str_('Downstairs'), np.str_('Jogging'), np.str_('Sitting'), np.str_('Standing'), np.str_('Upstairs'), np.str_('Walking')]


In [6]:
# ================================================
# 4) Train/Test Split
# ================================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y_cat, test_size=0.2, random_state=42, stratify=y
)

In [7]:
# ================================================
# 5) 표준화
# ================================================
scaler = StandardScaler()
X_train_2d = X_train.reshape(-1, X_train.shape[-1])
X_test_2d  = X_test.reshape(-1,  X_test.shape[-1])

scaler.fit(X_train_2d)
X_train = scaler.transform(X_train_2d).reshape(X_train.shape)
X_test  = scaler.transform(X_test_2d ).reshape(X_test.shape)

In [8]:
# ================================================
# 6) CNN 모델 정의 (Sequential, Flatten)
# ================================================
def build_cnn(input_shape, num_classes):
    model = Sequential()
    model.add(Conv1D(64, kernel_size=5, activation='relu', input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))

    model.add(Conv1D(128, kernel_size=5, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))

    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))

    model.compile(optimizer=tf.keras.optimizers.Adam(1e-3),
                  loss='categorical_crossentropy',  # one-hot
                  metrics=['accuracy'])
    return model

model = build_cnn((frame_size, N_FEATURES), num_classes)
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
# ================================================
# 7) 학습
# ================================================
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=30,
    batch_size=256,
    verbose=1
)

Epoch 1/30
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 109ms/step - accuracy: 0.6009 - loss: 1.3994 - val_accuracy: 0.1910 - val_loss: 6.2860
Epoch 2/30
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7813 - loss: 0.6144 - val_accuracy: 0.0909 - val_loss: 11.1003
Epoch 3/30
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8096 - loss: 0.5217 - val_accuracy: 0.1493 - val_loss: 6.6724
Epoch 4/30
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8304 - loss: 0.4645 - val_accuracy: 0.3813 - val_loss: 2.6762
Epoch 5/30
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.8430 - loss: 0.4244 - val_accuracy: 0.6588 - val_loss: 1.2618
Epoch 6/30
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8670 - loss: 0.3766 - val_accuracy: 0.7835 - val_loss: 0.7653
Epoch 7/30
[1m68/68[0m [32m━━━━━━

In [10]:
# ================================================
# 8) 평가
# ================================================
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"[Test] loss={test_loss:.4f}  acc={test_acc:.4f}")

y_pred = np.argmax(model.predict(X_test, verbose=0), axis=1)
y_true = np.argmax(y_test, axis=1)

print("\nClassification Report")
print(classification_report(y_true, y_pred, target_names=list(le.classes_)))

cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix (rows=true, cols=pred):\n", cm)

[Test] loss=0.2114  acc=0.9542

Classification Report
              precision    recall  f1-score   support

  Downstairs       0.88      0.81      0.84       503
     Jogging       1.00      0.97      0.98      1682
     Sitting       0.99      0.94      0.97       299
    Standing       0.89      1.00      0.94       242
    Upstairs       0.84      0.92      0.88       614
     Walking       0.98      0.99      0.98      2092

    accuracy                           0.95      5432
   macro avg       0.93      0.94      0.93      5432
weighted avg       0.96      0.95      0.95      5432


Confusion Matrix (rows=true, cols=pred):
 [[ 406    0    1   10   67   19]
 [   8 1626    0    0   26   22]
 [   0    0  282   16    1    0]
 [   0    0    1  241    0    0]
 [  34    4    0    5  565    6]
 [  16    1    0    0   12 2063]]
