In [126]:
import mlflow

mlflow.set_tracking_uri(
    "sqlite:////Users/mac/Desktop/HW/State_Data/mlflow.db"
)
mlflow.set_experiment("core3_degradation_hierarchical")

<Experiment: artifact_location='/Users/mac/Desktop/HW/State_Data/core_3/mlruns/1', creation_time=1766689258057, experiment_id='1', last_update_time=1766689258057, lifecycle_stage='active', name='core3_degradation_hierarchical', tags={'mlflow.experimentKind': 'custom_model_development'}>

In [127]:
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

import mlflow.sklearn
import mlflow.keras

공통 supervised 생성기

In [None]:
# 공통 supervised 생성기 (meta 포함)
def make_supervised_delta(df, lookback=14, horizon=7):
    X_list, y_list = [], []
    asset_list, tindex_list = [], []

    for asset_id, g in df.groupby("asset_id"):
        g = g.sort_values("t_index").reset_index(drop=True)
        s = pd.to_numeric(g["state_value"], errors="coerce").values

        if len(s) < lookback + horizon:
            continue

        for t in range(lookback - 1, len(s) - horizon):
            x = s[t - lookback + 1 : t + 1]
            y = s[t + horizon] - s[t]

            if np.isnan(x).any() or np.isnan(y):
                continue

            X_list.append(x)
            y_list.append(y)

            # ✅ 이 y(=delta)는 "현재 시점 t"에 매핑
            asset_list.append(asset_id)
            tindex_list.append(int(g.loc[t, "t_index"]))

    return (
        np.array(X_list),
        np.array(y_list),
        np.array(asset_list),
        np.array(tindex_list),
    )

 Linear Regression 학습 루틴

In [129]:
def train_linear(X_train, y_train, X_val, y_val):
    model = LinearRegression()
    model.fit(X_train, y_train)

    pred = model.predict(X_val)

    mae = mean_absolute_error(y_val, pred)
    mse = mean_squared_error(y_val, pred)
    rmse = np.sqrt(mse)
    error_std = np.std(y_val - pred)

    return model, mae, rmse, error_std

LSTM 학습 루틴 (얕게, 고정)

In [130]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

def train_lstm(X_train, y_train, X_val, y_val, lookback):
    X_train = X_train[..., None]
    X_val = X_val[..., None]

    model = Sequential([
        LSTM(32, input_shape=(lookback, 1)),
        Dense(1)
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss="mse"
    )

    model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=20,
        batch_size=32,
        verbose=0
    )

    pred = model.predict(X_val).reshape(-1)

    mae = mean_absolute_error(y_val, pred)
    mse = mean_squared_error(y_val, pred)
    rmse = np.sqrt(mse)
    error_std = np.std(y_val - pred)

    return model, mae, rmse, error_std

Stage A (NASA) 실행: Linear

In [131]:
# Stage A (NASA) 실행: Linear + LSTM
LOOKBACK = 14
HORIZON = 7

df_a = pd.read_csv("../data_csv/nasa_core.csv")

X_a, y_a = make_supervised_delta(df_a, LOOKBACK, HORIZON)

split_a = int(len(y_a) * 0.8)
Xa_train, Xa_val = X_a[:split_a], X_a[split_a:]
ya_train, ya_val = y_a[:split_a], y_a[split_a:]

with mlflow.start_run(run_name="A_nasa_linear"):
    mlflow.log_param("stage", "A")
    mlflow.log_param("dataset", "nasa")
    mlflow.log_param("model_type", "linear")
    mlflow.log_param("lookback", LOOKBACK)
    mlflow.log_param("horizon", HORIZON)

    model, mae, rmse, error_std = train_linear(
        Xa_train, ya_train, Xa_val, ya_val
    )

    mlflow.log_metric("val_MAE", mae)
    mlflow.log_metric("val_RMSE", rmse)
    mlflow.log_metric("error_std", error_std)

    mlflow.sklearn.log_model(model, "model")




Stage A (NASA) LSTM 실행

In [132]:
with mlflow.start_run(run_name="A_nasa_lstm"):
    mlflow.log_param("stage", "A")
    mlflow.log_param("dataset", "nasa")
    mlflow.log_param("model_type", "lstm")
    mlflow.log_param("lookback", LOOKBACK)
    mlflow.log_param("horizon", HORIZON)

    model, mae, rmse, error_std = train_lstm(
        Xa_train, ya_train, Xa_val, ya_val, LOOKBACK
    )

    mlflow.log_metric("val_MAE", mae)
    mlflow.log_metric("val_RMSE", rmse)
    mlflow.log_metric("error_std", error_std)

    mlflow.keras.log_model(model, "model")

  super().__init__(**kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step




In [133]:
# =========================
# Stage B (liBattery)
# =========================

df_b = pd.read_csv("../data_csv/libattery_core.csv")

X_b, y_b = make_supervised_delta(df_b, LOOKBACK, HORIZON)

split_b = int(len(y_b) * 0.8)
Xb_train, Xb_val = X_b[:split_b], X_b[split_b:]
yb_train, yb_val = y_b[:split_b], y_b[split_b:]

# ---- Linear ----
with mlflow.start_run(run_name="B_libattery_linear"):
    mlflow.log_param("stage", "B")
    mlflow.log_param("dataset", "libattery")
    mlflow.log_param("model_type", "linear")
    mlflow.log_param("lookback", LOOKBACK)
    mlflow.log_param("horizon", HORIZON)

    model, mae, rmse, error_std = train_linear(
        Xb_train, yb_train, Xb_val, yb_val
    )

    mlflow.log_metric("val_MAE", mae)
    mlflow.log_metric("val_RMSE", rmse)
    mlflow.log_metric("error_std", error_std)

    mlflow.sklearn.log_model(model, "model")

# ---- LSTM ----
with mlflow.start_run(run_name="B_libattery_lstm"):
    mlflow.log_param("stage", "B")
    mlflow.log_param("dataset", "libattery")
    mlflow.log_param("model_type", "lstm")
    mlflow.log_param("lookback", LOOKBACK)
    mlflow.log_param("horizon", HORIZON)

    model, mae, rmse, error_std = train_lstm(
        Xb_train, yb_train, Xb_val, yb_val, LOOKBACK
    )

    mlflow.log_metric("val_MAE", mae)
    mlflow.log_metric("val_RMSE", rmse)
    mlflow.log_metric("error_std", error_std)

    mlflow.keras.log_model(model, "model")

  super().__init__(**kwargs)


[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step




Stage C용 Stress Transform (정리본)

In [134]:
def stress_transform(df, gap=2, noise_sigma=0.05):
    out = []
    for _, g in df.groupby("asset_id"):
        g = g.sort_values("t_index").iloc[::gap].copy()
        g["state_value"] += np.random.normal(0, noise_sigma, len(g))
        out.append(g)
    return pd.concat(out, ignore_index=True)

Stage C (Synthetic, Linear) 실행

In [135]:
# Stage C (Synthetic, Linear) 실행
df_c = pd.read_csv("../data_csv/synthetic_degradation_core.csv")
df_c = stress_transform(df_c, gap=5, noise_sigma=0.1)

X_c, y_c = make_supervised_delta(df_c, LOOKBACK, HORIZON)

split_c = int(len(y_c) * 0.8)
Xc_train, Xc_val = X_c[:split_c], X_c[split_c:]
yc_train, yc_val = y_c[:split_c], y_c[split_c:]

with mlflow.start_run(run_name="C_synth_linear_gap5_noise0p1"):
    mlflow.log_param("stage", "C")
    mlflow.log_param("dataset", "synthetic")
    mlflow.log_param("model_type", "linear")
    mlflow.log_param("lookback", LOOKBACK)
    mlflow.log_param("horizon", HORIZON)
    mlflow.log_param("sampling_gap", 5)
    mlflow.log_param("noise_sigma", 0.1)

    model, mae, rmse, error_std = train_linear(
        Xc_train, yc_train, Xc_val, yc_val
    )

    mlflow.log_metric("val_MAE", mae)
    mlflow.log_metric("val_RMSE", rmse)
    mlflow.log_metric("error_std", error_std)

    mlflow.sklearn.log_model(model, "model")

