In [3]:
import tensorflow as tf
print(tf.__version__)

2.16.2


In [4]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

PATH = "../data_csv/health_timeseries_core_state.csv"
LOOKBACK = 14
HORIZON = 7

df = pd.read_csv(PATH)
df["date"] = pd.to_datetime(df["date"])

df = (
    df.groupby(["user_id", "date"], as_index=False)
      .mean(numeric_only=True)
)

df = df.sort_values(["user_id", "date"]).reset_index(drop=True)

print("rows:", len(df), "users:", df["user_id"].nunique())
print("missing health_state_index:", df["health_state_index"].isna().sum())
df.head()

rows: 940 users: 33
missing health_state_index: 461


Unnamed: 0,user_id,date,mean_hr,hr_std,steps,calories,sleep_minutes,health_state_level,health_state_speed,health_state_index
0,1503960366,2016-04-12,,,13162.0,1985.0,327.0,-0.002802,0.0,-0.002802
1,1503960366,2016-04-13,,,10735.0,1797.0,384.0,0.054147,0.056949,0.45279
2,1503960366,2016-04-14,,,10460.0,1776.0,,,,
3,1503960366,2016-04-15,,,9762.0,1745.0,412.0,0.064168,,
4,1503960366,2016-04-16,,,12669.0,1863.0,340.0,0.005229,,


Dataset 생성 함수 (X=과거 state, y=Δstate)

In [5]:
def make_supervised_delta(df, lookback=14, horizon=7):
    X_list, y_list = [], []
    meta = []  # (user_id, t_date, target_date)

    for uid, g in df.groupby("user_id"):
        g = g.sort_values("date").copy()

        # state가 없는 날은 학습 불가 → 제거
        g = g.dropna(subset=["health_state_index"])

        s = g["health_state_index"].values
        dates = g["date"].values

        if len(s) < lookback + horizon + 1:
            continue

        # t는 "입력 윈도우의 마지막 시점"
        # y는 t+horizon의 state - t의 state
        for t in range(lookback - 1, len(s) - horizon):
            x = s[t - lookback + 1 : t + 1]                 
            y = s[t + horizon] - s[t]                       

            X_list.append(x)
            y_list.append(y)
            meta.append((uid, dates[t], dates[t + horizon]))

    X = np.array(X_list)  
    y = np.array(y_list)  
    meta = pd.DataFrame(meta, columns=["user_id", "t_date", "target_date"])

    return X, y, meta

X, y, meta = make_supervised_delta(df, LOOKBACK, HORIZON)

print("Supervised samples:", len(y))
print("X shape:", X.shape, "y shape:", y.shape)
meta.head()

Supervised samples: 119
X shape: (119, 14) y shape: (119,)


Unnamed: 0,user_id,t_date,target_date
0,1624580081,2016-04-25,2016-05-02
1,1624580081,2016-04-26,2016-05-03
2,1624580081,2016-04-27,2016-05-04
3,1624580081,2016-04-28,2016-05-05
4,1624580081,2016-04-29,2016-05-06


Time-based Train/Val split

In [6]:
order = np.argsort(meta["target_date"].values)
X = X[order]
y = y[order]
meta = meta.iloc[order].reset_index(drop=True)

split = int(len(y) * 0.8)

X_train, y_train = X[:split], y[:split]
X_val, y_val = X[split:], y[split:]

print("train:", len(y_train), "val:", len(y_val))

train: 95 val: 24


Model 1 — Linear Regression + 평가

In [8]:
lr = LinearRegression()
lr.fit(X_train, y_train)

pred_train = lr.predict(X_train)
pred_val = lr.predict(X_val)

def eval_reg(y_true, y_pred, name="model"):
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    print(f"[{name}] MAE={mae:.4f} RMSE={rmse:.4f}")
    return mae, rmse

eval_reg(y_train, pred_train, "Linear(train)")
eval_reg(y_val, pred_val, "Linear(val)")

# 샘플 출력
for i in range(5):
    print("true:", y_val[i], "pred:", pred_val[i], "| user:", meta.loc[split+i, "user_id"])

[Linear(train)] MAE=0.4041 RMSE=0.5031
[Linear(val)] MAE=0.4633 RMSE=0.5724
true: 0.1779976278372695 pred: -0.2614488503864092 | user: 1624580081
true: -1.160326742571763 pred: -0.0015609972331679794 | user: 6962181067
true: 1.172329251408335 pred: -0.1138968523935345 | user: 6290855005
true: 0.1171181401954833 pred: 0.2676581960115344 | user: 2022484408
true: 0.41478817829104336 pred: 0.902094260821762 | user: 2320127002


Model 2 — LSTM (얕게, Keras) + 평가

In [9]:
import tensorflow as tf
from tensorflow.keras import layers, models

# LSTM 입력 형태로 reshape
X_train_lstm = X_train[..., None]  # (N, lookback, 1)
X_val_lstm = X_val[..., None]

model = models.Sequential([
    layers.Input(shape=(LOOKBACK, 1)),
    layers.LSTM(16),               # 얕게
    layers.Dense(1)
])

model.compile(optimizer="adam", loss="mse")

hist = model.fit(
    X_train_lstm, y_train,
    validation_data=(X_val_lstm, y_val),
    epochs=15,
    batch_size=32,
    verbose=1
)

pred_train_lstm = model.predict(X_train_lstm).reshape(-1)
pred_val_lstm = model.predict(X_val_lstm).reshape(-1)

eval_reg(y_train, pred_train_lstm, "LSTM(train)")
eval_reg(y_val, pred_val_lstm, "LSTM(val)")

for i in range(5):
    print("true:", y_val[i], "pred:", pred_val_lstm[i], "| user:", meta.loc[split+i, "user_id"])

2025-12-25 22:13:56.609930: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-12-25 22:13:56.610032: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-12-25 22:13:56.610052: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2025-12-25 22:13:56.610137: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-12-25 22:13:56.610196: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/15


2025-12-25 22:13:57.636415: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 370ms/step - loss: 0.6371 - val_loss: 0.2821
Epoch 2/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.6314 - val_loss: 0.2818
Epoch 3/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.6258 - val_loss: 0.2819
Epoch 4/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - loss: 0.6200 - val_loss: 0.2819
Epoch 5/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step - loss: 0.6144 - val_loss: 0.2818
Epoch 6/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.6094 - val_loss: 0.2821
Epoch 7/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.6054 - val_loss: 0.2827
Epoch 8/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 0.5993 - val_loss: 0.2828
Epoch 9/15
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

“대충 비교” 출력(마무리)

In [12]:
print("\n=== Core 2 summary ===")
print(f"LOOKBACK={LOOKBACK}, HORIZON={HORIZON}")
print("Linear vs LSTM: val MAE/RMSE만 보고 Core 3에서 MLflow로 확장")


=== Core 2 summary ===
LOOKBACK=14, HORIZON=7
Linear vs LSTM: val MAE/RMSE만 보고 Core 3에서 MLflow로 확장


결과 정리

In [13]:
df_linear = pd.DataFrame({
    "user_id": meta.loc[split:, "user_id"].values,
    "date": pd.to_datetime(meta.loc[split:, "target_date"]).dt.date,
    "model_type": "linear",
    "y_true": y_val,
    "y_pred": pred_val,
})

df_linear["abs_error"] = np.abs(df_linear["y_true"] - df_linear["y_pred"])
df_linear["run_tag"] = "core2_baseline"

In [14]:
df_lstm = pd.DataFrame({
    "user_id": meta.loc[split:, "user_id"].values,
    "date": pd.to_datetime(meta.loc[split:, "target_date"]).dt.date,
    "model_type": "lstm",
    "y_true": y_val,
    "y_pred": pred_val_lstm,
})

df_lstm["abs_error"] = np.abs(df_lstm["y_true"] - df_lstm["y_pred"])
df_lstm["run_tag"] = "core2_baseline"

In [15]:
df_all = pd.concat([df_linear, df_lstm], ignore_index=True)
df_all.head()

Unnamed: 0,user_id,date,model_type,y_true,y_pred,abs_error,run_tag
0,1624580081,2016-05-10,linear,0.177998,-0.261449,0.439446,core2_baseline
1,6962181067,2016-05-10,linear,-1.160327,-0.001561,1.158766,core2_baseline
2,6290855005,2016-05-10,linear,1.172329,-0.113897,1.286226,core2_baseline
3,2022484408,2016-05-10,linear,0.117118,0.267658,0.15054,core2_baseline
4,2320127002,2016-05-10,linear,0.414788,0.902094,0.487306,core2_baseline


In [19]:
pip install sqlalchemy pymysql

Collecting sqlalchemy
  Using cached sqlalchemy-2.0.45-cp310-cp310-macosx_11_0_arm64.whl.metadata (9.5 kB)
Collecting pymysql
  Using cached pymysql-1.1.2-py3-none-any.whl.metadata (4.3 kB)
Using cached sqlalchemy-2.0.45-cp310-cp310-macosx_11_0_arm64.whl (2.2 MB)
Using cached pymysql-1.1.2-py3-none-any.whl (45 kB)
Installing collected packages: sqlalchemy, pymysql
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [pymysql]m1/2[0m [pymysql]
[1A[2KSuccessfully installed pymysql-1.1.2 sqlalchemy-2.0.45
Note: you may need to restart the kernel to use updated packages.


In [21]:
from sqlalchemy import create_engine


df_all = df_all.rename(columns={
    "model_type": "mode_type"
})

engine = create_engine(
    "mysql+pymysql://health_user:strong_password_here@localhost:3306/HEALTH"
)

df_all.to_sql(
    "prediction_results",
    con=engine,
    if_exists="append",
    index=False
)

print("✅ Core 2 prediction_results 저장 완료")

✅ Core 2 prediction_results 저장 완료
