In [2]:
# 01_label_merge.py
import pandas as pd
import os

# === 1. Atur nama file CSV hasil logging ESP32 ===
# Pastikan nama dan path sesuai dengan file kamu
DATA_FILES = {
    "diam": "idle_0002.csv",     # data saat diam
    "jalan": "walk_0003.csv",    # data saat berjalan
    "jogging": "jog_0005.csv"    # data saat jogging
}

# === 2. Gabungkan semua CSV dan beri label ===
df_all = pd.DataFrame()

for label, path in DATA_FILES.items():
    if not os.path.exists(path):
        print(f"[!] File {path} tidak ditemukan, lewati.")
        continue

    # Baca file (skip metadata baris jika ada)
    df = pd.read_csv(path, comment='#')

    # Pastikan kolom sesuai format CSV ESP32
    expected_cols = ['timestamp_ms', 'accX', 'accY', 'accZ', 'gyroX', 'gyroY', 'gyroZ']
    df = df[expected_cols]  # pastikan urutannya sama

    # Tambahkan kolom label
    df['label'] = label

    print(f"[OK] {label:<8} -> {len(df)} baris")
    df_all = pd.concat([df_all, df], ignore_index=True)

# === 3. Simpan dataset gabungan ===
if not df_all.empty:
    out_path = "dataset_all.csv"
    df_all.to_csv(out_path, index=False)
    print(f"\n✅ Dataset gabungan disimpan sebagai: {out_path}")
    print(f"Total baris: {len(df_all)}")
    print(df_all.head())
else:
    print("[!] Tidak ada data yang digabungkan.")


[OK] diam     -> 15000 baris
[OK] jalan    -> 15000 baris
[OK] jogging  -> 15000 baris

✅ Dataset gabungan disimpan sebagai: dataset_all.csv
Total baris: 45000
   timestamp_ms   accX   accY   accZ  gyroX  gyroY  gyroZ label
0           806  0.179 -0.014  1.015 -3.586  1.251 -0.885  diam
1           826  0.185 -0.004  1.019 -3.937  0.122 -0.473  diam
2           846  0.197  0.008  1.023 -3.906 -1.297 -1.984  diam
3           866  0.171  0.015  1.003 -3.281 -2.090 -3.281  diam
4           886  0.159  0.004  0.995 -2.625 -0.595 -2.670  diam


In [3]:
# 02_split_dataset.py
import pandas as pd
from sklearn.model_selection import train_test_split

# === 1. Baca dataset gabungan ===
df = pd.read_csv("dataset_all.csv")

print("Jumlah total data:", len(df))
print("Distribusi label:\n", df['label'].value_counts())

# === 2. Bagi dataset menjadi Train / Validation / Test ===
# Rasio umum: 64% train, 16% val, 20% test
train, test = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
train, val  = train_test_split(train, test_size=0.2, stratify=train['label'], random_state=42)

print("\nTrain:", len(train))
print("Val  :", len(val))
print("Test :", len(test))

# === 3. Simpan hasil split ===
train.to_csv("train.csv", index=False)
val.to_csv("val.csv", index=False)
test.to_csv("test.csv", index=False)

print("\n✅ Dataset dibagi dan disimpan:")
print("  train.csv  ->", len(train))
print("  val.csv    ->", len(val))
print("  test.csv   ->", len(test))


Jumlah total data: 45000
Distribusi label:
 label
diam       15000
jalan      15000
jogging    15000
Name: count, dtype: int64

Train: 28800
Val  : 7200
Test : 9000

✅ Dataset dibagi dan disimpan:
  train.csv  -> 28800
  val.csv    -> 7200
  test.csv   -> 9000


In [4]:
# 03_make_windows.py
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder

# ==== Parameter ====
WINDOW_SIZE = 50     # 1 detik @ 50Hz
STEP_SIZE = 25       # 50% overlap
FEATURES = ['accX', 'accY', 'accZ', 'gyroX', 'gyroY', 'gyroZ']

# ==== Fungsi untuk membuat window ====
def make_windows(df, label_encoder):
    X, y = [], []
    data = df[FEATURES].values
    labels = df['label'].values
    label_num = label_encoder.transform(labels)[0]  # semua label di file sama

    for start in range(0, len(data) - WINDOW_SIZE + 1, STEP_SIZE):
        end = start + WINDOW_SIZE
        segment = data[start:end]
        X.append(segment)
        y.append(label_num)

    return np.array(X), np.array(y)

# ==== Proses setiap subset (train/val/test) ====
all_X, all_y = {}, {}
encoder = LabelEncoder()

# Fit encoder di semua label dari dataset utama
df_all = pd.read_csv("dataset_all.csv")
encoder.fit(df_all['label'])
print("Kelas label:", list(encoder.classes_))

for subset in ['train', 'val', 'test']:
    filename = f"{subset}.csv"
    if not os.path.exists(filename):
        print(f"[!] File {filename} tidak ditemukan.")
        continue

    print(f"\n[+] Memproses {filename} ...")
    df = pd.read_csv(filename)

    # Pisahkan per aktivitas agar window tidak lintas batas
    X_list, y_list = [], []
    for label in df['label'].unique():
        df_label = df[df['label'] == label]
        X_part, y_part = make_windows(df_label, encoder)
        X_list.append(X_part)
        y_list.append(y_part)

    X = np.vstack(X_list)
    y = np.concatenate(y_list)

    all_X[subset] = X
    all_y[subset] = y

    np.save(f"X_{subset}.npy", X)
    np.save(f"y_{subset}.npy", y)
    print(f"  -> {X.shape[0]} window dibuat, bentuk tiap window {X.shape[1:]}")

print("\n✅ Semua subset selesai diproses dan disimpan (X_*.npy, y_*.npy)")


Kelas label: ['diam', 'jalan', 'jogging']

[+] Memproses train.csv ...
  -> 1149 window dibuat, bentuk tiap window (50, 6)

[+] Memproses val.csv ...
  -> 285 window dibuat, bentuk tiap window (50, 6)

[+] Memproses test.csv ...
  -> 357 window dibuat, bentuk tiap window (50, 6)

✅ Semua subset selesai diproses dan disimpan (X_*.npy, y_*.npy)
