In [11]:
# Import dependencies
import re 
import random
import numpy as np
import pandas as pd
from scipy.io import arff
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text

SEED = 0
random.seed(SEED)
np.random.seed(SEED)

# Load labels and features from ARFF
arff_data = arff.loadarff("/Users/moonseopkim/Downloads/ST_MLC/STMems_Machine_Learning_Core/MLCtest/reference_ver/features.arff")
df = pd.DataFrame(arff_data[0])

LABELS = sorted({lbl.decode() for lbl in df["class"]})
label_to_int = {lbl: idx for idx, lbl in enumerate(LABELS)}

X = df.drop("class", axis=1).values.astype(np.float32)
y = np.array([LABELS.index(label.decode()) for label in df["class"]])
feature_names = list(df.drop("class", axis=1).columns)

# Split dataset into train set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=SEED)

# Train decision tree classifer
clf = DecisionTreeClassifier(
    max_depth=128,
    criterion="entropy",
    min_impurity_decrease=1e-3,
    ccp_alpha=1e-2,
    class_weight="balanced",
    random_state=SEED,
)
clf.fit(X_train, y_train)

# Define utility function for weka format conversion
def convert_sklearn_to_weka(clf, feature_names):
    dt = re.sub(r"\|--- ", r"", export_text(clf, feature_names=feature_names)).splitlines()
    new_dt = []
    line_idx = 1
    while line_idx < len(dt):
        match = re.search(r"class: (\d+)$", dt[line_idx])
        if match is not None:
            label = LABELS[int(match.group(1))]
            new_dt.append(f"{dt[line_idx - 1]}: {label}")
            line_idx += 2
        else:
            new_dt.append(dt[line_idx - 1])
            line_idx += 1
    return "\n".join(new_dt)

# Export model to weka textual format
dectree = convert_sklearn_to_weka(clf, feature_names)
with open("dectree.txt", "w") as f:
    f.write(dectree)

In [7]:
import pandas as pd
import numpy as np

# 1) CSV 읽기
df = pd.read_csv(
    "/Users/moonseopkim/Downloads/ST_MLC/STMems_Machine_Learning_Core/MLCtest/personalized_ver/high_personalized.csv",
    comment="#", sep=",", engine="python"
)
df.columns = df.columns.str.strip()

x_g = df["acc_x[mg]"] / 1000.0
y_g = df["acc_y[mg]"] / 1000.0
z_g = df["acc_z[mg]"] / 1000.0

v2 = x_g**2 + y_g**2 + z_g**2

WINDOW = 240

p2p_v2 = [
    v2[i:i+WINDOW].max() - v2[i:i+WINDOW].min()
    for i in range(0, len(v2) - WINDOW + 1, WINDOW)
]
for idx, val in enumerate(p2p_v2, 1):
    print(f"Window {idx}: {val:.5f} g²")


Window 1: 13.10600 g²
Window 2: 14.08967 g²
Window 3: 13.18100 g²
Window 4: 10.34776 g²
Window 5: 11.53041 g²
