In [None]:
import os
from pathlib import Path

import joblib
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
tracking_uri = os.getenv("MLFLOW_TRACKING_URI", "file:./mlruns")
mlflow.set_tracking_uri(tracking_uri)

experiment_name = os.getenv("MLFLOW_EXPERIMENT_NAME", "predictive-maintenance-cmapss")
mlflow.set_experiment(experiment_name)

print("MLFLOW_TRACKING_URI =", tracking_uri)
print("Experiment =", experiment_name)

In [None]:
train_path = Path(os.getenv("TRAIN_CSV", "artifacts/processed/train.csv"))
val_path = Path(os.getenv("VAL_CSV", "artifacts/processed/val.csv"))
model_path = Path(os.getenv("MODEL_PATH", "models/best_model.joblib"))

train_df = pd.read_csv(train_path)
val_df = pd.read_csv(val_path)

# Common target names in CMAPSS pipelines
candidate_targets = ["RUL", "rul", "target", "y"]
target_col = next((c for c in candidate_targets if c in train_df.columns), None)
if target_col is None:
    raise ValueError(
        f"Could not infer target column. Expected one of {candidate_targets}. "
        f"Columns: {list(train_df.columns)[:30]}..."
    )

# Exclude non-feature identifier columns commonly present in CMAPSS
id_cols = [c for c in ["unit", "engine_id", "id", "cycle", "time_cycles"] if c in train_df.columns]

feature_cols = [c for c in train_df.columns if c not in id_cols + [target_col]]

X_val = val_df[feature_cols]
y_val = val_df[target_col]

print("target_col =", target_col)
print("id_cols =", id_cols)
print("n_features =", len(feature_cols))

In [None]:
model = joblib.load(model_path)

y_pred = model.predict(X_val)

mae = mean_absolute_error(y_val, y_pred)
rmse = mean_squared_error(y_val, y_pred, squared=False)

with mlflow.start_run(run_name="baseline-cmapss"):
    mlflow.log_param("model_path", str(model_path))
    mlflow.log_param("target_col", target_col)
    mlflow.log_param("id_cols", ",".join(id_cols))
    mlflow.log_param("n_features", int(len(feature_cols)))
    mlflow.log_param("features", ",".join(feature_cols[:50]))  # keep logs compact

    mlflow.log_metric("mae", float(mae))
    mlflow.log_metric("rmse", float(rmse))

    mlflow.sklearn.log_model(model, artifact_path="model")

print({"mae": mae, "rmse": rmse})