In [11]:
import mlflow
import mlflow.sklearn
import mlflow.keras

import numpy as np
import pandas as pd
import os

os.makedirs("../core3_output", exist_ok=True)

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

mlflow.set_tracking_uri("sqlite:////Users/mac/Desktop/HW/State_Data/mlflow.db")
mlflow.set_experiment("core3_degradation_hierarchical")

LOOKBACK = 14
HORIZON = 7

In [12]:
# 공통 supervised 생성기 (meta 포함)
def make_supervised_delta(df, lookback=14, horizon=7):
    X_list, y_list = [], []
    asset_list, tindex_list = [], []

    for asset_id, g in df.groupby("asset_id"):
        g = g.sort_values("t_index").reset_index(drop=True)
        s = pd.to_numeric(g["state_value"], errors="coerce").values

        if len(s) < lookback + horizon:
            continue

        for t in range(lookback - 1, len(s) - horizon):
            x = s[t - lookback + 1 : t + 1]
            y = s[t + horizon] - s[t]

            if np.isnan(x).any() or np.isnan(y):
                continue

            X_list.append(x)
            y_list.append(y)
            asset_list.append(asset_id)
            tindex_list.append(int(g.loc[t, "t_index"]))

    return (
        np.array(X_list),
        np.array(y_list),
        np.array(asset_list),
        np.array(tindex_list),
    )

def train_linear(X_train, y_train, X_val, y_val):
    model = LinearRegression()
    model.fit(X_train, y_train)

    pred = model.predict(X_val)
    resid = y_val - pred

    mae = mean_absolute_error(y_val, pred)
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    error_std = float(np.std(resid))

    return model, pred, mae, rmse, error_std

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam

def train_lstm(X_train, y_train, X_val, y_val, lookback):
    X_train_l = X_train[..., None]
    X_val_l = X_val[..., None]

    model = Sequential([
        LSTM(32, input_shape=(lookback, 1)),
        Dense(1)
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss="mse")

    model.fit(
        X_train_l, y_train,
        validation_data=(X_val_l, y_val),
        epochs=20,
        batch_size=32,
        verbose=0
    )

    pred = model.predict(X_val_l).reshape(-1)
    resid = y_val - pred

    mae = mean_absolute_error(y_val, pred)
    rmse = np.sqrt(mean_squared_error(y_val, pred))
    error_std = float(np.std(resid))

    return model, pred, mae, rmse, error_std

def make_df_pred(asset_id_val, t_index_val, y_pred_val, error_std_val):
    return pd.DataFrame({
        "asset_id": asset_id_val,
        "t_index": t_index_val,
        "y_pred": y_pred_val,
        "error_std": np.full(len(y_pred_val), error_std_val)
    })

In [13]:
df_a = pd.read_csv("../data_csv/nasa_core.csv")

X_a, y_a, aid_a, tid_a = make_supervised_delta(df_a, LOOKBACK, HORIZON)

split_a = int(len(y_a) * 0.8)
Xa_train, Xa_val = X_a[:split_a], X_a[split_a:]
ya_train, ya_val = y_a[:split_a], y_a[split_a:]
aid_a_val, tid_a_val = aid_a[split_a:], tid_a[split_a:]

# ---- A Linear ----
with mlflow.start_run(run_name="A_nasa_linear"):
    mlflow.log_param("stage", "A")
    mlflow.log_param("dataset", "nasa")
    mlflow.log_param("model_type", "linear")
    mlflow.log_param("lookback", LOOKBACK)
    mlflow.log_param("horizon", HORIZON)

    model_A_linear, pred_A_linear, mae_A_linear, rmse_A_linear, error_std_A_linear = train_linear(
        Xa_train, ya_train, Xa_val, ya_val
    )

    mlflow.log_metric("val_MAE", mae_A_linear)
    mlflow.log_metric("val_RMSE", rmse_A_linear)
    mlflow.log_metric("error_std", error_std_A_linear)

    mlflow.sklearn.log_model(model_A_linear, "model")

df_pred_A_linear = make_df_pred(aid_a_val, tid_a_val, pred_A_linear, error_std_A_linear)
df_pred_A_linear.to_csv("../core3_output/A_nasa_linear_pred.csv", index=False)

# ---- A LSTM ----
with mlflow.start_run(run_name="A_nasa_lstm"):
    mlflow.log_param("stage", "A")
    mlflow.log_param("dataset", "nasa")
    mlflow.log_param("model_type", "lstm")
    mlflow.log_param("lookback", LOOKBACK)
    mlflow.log_param("horizon", HORIZON)

    model_A_lstm, pred_A_lstm, mae_A_lstm, rmse_A_lstm, error_std_A_lstm = train_lstm(
        Xa_train, ya_train, Xa_val, ya_val, LOOKBACK
    )

    mlflow.log_metric("val_MAE", mae_A_lstm)
    mlflow.log_metric("val_RMSE", rmse_A_lstm)
    mlflow.log_metric("error_std", error_std_A_lstm)

    mlflow.keras.log_model(model_A_lstm, "model")

df_pred_A_lstm = make_df_pred(aid_a_val, tid_a_val, pred_A_lstm, error_std_A_lstm)
df_pred_A_lstm.to_csv("../core3_output/A_nasa_lstm_pred.csv", index=False)

  super().__init__(**kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step




In [14]:
df_b = pd.read_csv("../data_csv/libattery_core.csv")

X_b, y_b, aid_b, tid_b = make_supervised_delta(df_b, LOOKBACK, HORIZON)

split_b = int(len(y_b) * 0.8)
Xb_train, Xb_val = X_b[:split_b], X_b[split_b:]
yb_train, yb_val = y_b[:split_b], y_b[split_b:]
aid_b_val, tid_b_val = aid_b[split_b:], tid_b[split_b:]

# ---- B Linear ----
with mlflow.start_run(run_name="B_libattery_linear"):
    mlflow.log_param("stage", "B")
    mlflow.log_param("dataset", "libattery")
    mlflow.log_param("model_type", "linear")
    mlflow.log_param("lookback", LOOKBACK)
    mlflow.log_param("horizon", HORIZON)

    model_B_linear, pred_B_linear, mae_B_linear, rmse_B_linear, error_std_B_linear = train_linear(
        Xb_train, yb_train, Xb_val, yb_val
    )

    mlflow.log_metric("val_MAE", mae_B_linear)
    mlflow.log_metric("val_RMSE", rmse_B_linear)
    mlflow.log_metric("error_std", error_std_B_linear)

    mlflow.sklearn.log_model(model_B_linear, "model")

df_pred_B_linear = make_df_pred(aid_b_val, tid_b_val, pred_B_linear, error_std_B_linear)
df_pred_B_linear.to_csv("../core3_output/B_libattery_linear_pred.csv", index=False)

# ---- B LSTM ----
with mlflow.start_run(run_name="B_libattery_lstm"):
    mlflow.log_param("stage", "B")
    mlflow.log_param("dataset", "libattery")
    mlflow.log_param("model_type", "lstm")
    mlflow.log_param("lookback", LOOKBACK)
    mlflow.log_param("horizon", HORIZON)

    model_B_lstm, pred_B_lstm, mae_B_lstm, rmse_B_lstm, error_std_B_lstm = train_lstm(
        Xb_train, yb_train, Xb_val, yb_val, LOOKBACK
    )

    mlflow.log_metric("val_MAE", mae_B_lstm)
    mlflow.log_metric("val_RMSE", rmse_B_lstm)
    mlflow.log_metric("error_std", error_std_B_lstm)

    mlflow.keras.log_model(model_B_lstm, "model")

df_pred_B_lstm = make_df_pred(aid_b_val, tid_b_val, pred_B_lstm, error_std_B_lstm)
df_pred_B_lstm.to_csv("../core3_output/B_libattery_lstm_pred.csv", index=False)

  super().__init__(**kwargs)


[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step




In [15]:
df_c_raw = pd.read_csv("../data_csv/synthetic_degradation_core.csv")

configs = [
    (3, 0.05),
    (5, 0.10),
    (7, 0.15),
]

for gap, noise in configs:
    df_c = stress_transform(df_c_raw, gap=gap, noise_sigma=noise)

    X_c, y_c, aid_c, tid_c = make_supervised_delta(df_c, LOOKBACK, HORIZON)

    split_c = int(len(y_c) * 0.8)
    Xc_train, Xc_val = X_c[:split_c], X_c[split_c:]
    yc_train, yc_val = y_c[:split_c], y_c[split_c:]
    aid_c_val, tid_c_val = aid_c[split_c:], tid_c[split_c:]

    run_name = f"C_synth_linear_gap{gap}_noise{str(noise).replace('.','p')}"

    with mlflow.start_run(run_name=run_name):
        mlflow.log_param("stage", "C")
        mlflow.log_param("dataset", "synthetic")
        mlflow.log_param("model_type", "linear")
        mlflow.log_param("lookback", LOOKBACK)
        mlflow.log_param("horizon", HORIZON)
        mlflow.log_param("sampling_gap", gap)
        mlflow.log_param("noise_sigma", noise)

        model_C_linear, pred_C_linear, mae_C, rmse_C, error_std_C = train_linear(
            Xc_train, yc_train, Xc_val, yc_val
        )

        mlflow.log_metric("val_MAE", mae_C)
        mlflow.log_metric("val_RMSE", rmse_C)
        mlflow.log_metric("error_std", error_std_C)

        mlflow.sklearn.log_model(model_C_linear, "model")

    df_pred_C = make_df_pred(aid_c_val, tid_c_val, pred_C_linear, error_std_C)
    df_pred_C.to_csv(f"../core3_output/{run_name}_pred.csv", index=False)



A / Linear

In [19]:
df_pred_A_linear = make_df_pred(
    aid_a_val,
    tid_a_val,
    pred_A_linear,
    error_std_A_linear
)

df_pred_A_linear.to_csv(
    "../core3_output/A_nasa_linear_pred.csv",
    index=False
)

A / Linear

In [21]:
df_pred_A_lstm = make_df_pred(
    aid_a_val,
    tid_a_val,
    pred_A_lstm,
    error_std_A_lstm
)

df_pred_A_lstm.to_csv(
    "../core3_output/A_nasa_lstm_pred.csv",
    index=False
)

B / Linear

In [22]:
df_pred_B_linear = make_df_pred(
    aid_b_val,
    tid_b_val,
    pred_B_linear,
    error_std_B_linear
)

df_pred_B_linear.to_csv(
    "../core3_output/B_libattery_linear_pred.csv",
    index=False
)

B / LSTM

In [23]:
df_pred_B_lstm = make_df_pred(
    aid_b_val,
    tid_b_val,
    pred_B_lstm,
    error_std_B_lstm
)

df_pred_B_lstm.to_csv(
    "../core3_output/B_libattery_lstm_pred.csv",
    index=False
)

Synthetic

In [24]:
df_pred_C = make_df_pred(
    aid_c_val,
    tid_c_val,
    pred_C_linear,
    error_std_C
)

df_pred_C.to_csv(
    f"../core3_output/{run_name}_pred.csv",
    index=False
)