In [7]:
import mlflow
import mlflow.sklearn
mlflow.set_tracking_uri("http://127.0.0.1:5001")
mlflow.set_registry_uri("http://127.0.0.1:5001")  # ⭐ 반드시 추가
mlflow.set_experiment("core3_degradation_prediction")

2025/12/28 00:08:22 INFO mlflow.tracking.fluent: Experiment with name 'core3_degradation_prediction' does not exist. Creating a new experiment.


<Experiment: artifact_location='/backend/mlruns/1', creation_time=1766848102851, experiment_id='1', last_update_time=1766848102851, lifecycle_stage='active', name='core3_degradation_prediction', tags={}>

In [None]:
import pandas as pd
import numpy as np
import os

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

df = pd.read_csv("../data/raw/health_timeseries_core_state.csv")
df["date"] = pd.to_datetime(df["date"])

df = (
    df.rename(columns={
        "user_id": "asset_id",
        "health_state_index": "state_value"
    })
    .sort_values(["asset_id", "date"])
    .reset_index(drop=True)
)

df["delta_state"] = df.groupby("asset_id")["state_value"].diff()
df["state_lag1"] = df.groupby("asset_id")["state_value"].shift(1)

train_df = df.dropna(subset=["delta_state", "state_lag1"]).reset_index(drop=True)

X = train_df[["state_lag1"]]
y = train_df["delta_state"]

Run 1: Linear Regression (baseline)

In [5]:
with mlflow.start_run(run_name="linear_regression_baseline"):

    model = LinearRegression()
    model.fit(X, y)

    y_pred = model.predict(X)

    rmse = np.sqrt(mean_squared_error(y, y_pred))
    error_std = np.std(y - y_pred)

    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("error_std", error_std)

    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="model"
    )

    print("Run 1 RMSE:", rmse)
    print("Run 1 error_std:", error_std)



OSError: [Errno 30] Read-only file system: '/mlruns'

Run 2: 동일 모델, 다른 window
- 모델은 같고, 입력 분포만 다름

In [None]:
with mlflow.start_run(run_name="linear_regression_short_window"):

    cutoff = int(len(train_df) * 0.5)
    X_short = X.iloc[cutoff:]
    y_short = y.iloc[cutoff:]

    model = LinearRegression()
    model.fit(X_short, y_short)

    y_pred = model.predict(X_short)

    rmse = mean_squared_error(y_short, y_pred, squared=False)
    error_std = np.std(y_short - y_pred)

    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_param("data_scope", "recent_half")
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("error_std", error_std)

    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="model"
    )

    print("Run 2 RMSE:", rmse)
    print("Run 2 error_std:", error_std)

### Core 3 결론

- 상태 열화 예측은 단일 성능 수치로 판단할 수 없다.
- 동일한 모델이라도 데이터 조건에 따라 오차 특성이 달라진다.
- 따라서 예측 모델은 **의사결정 입력으로 사용되기 전, 관리·추적되어야 한다**.

이후 Core 단계에서는,
모델 자체보다 **이 예측값을 어떻게 사용하는가**가 더 중요해진다.