In [None]:
pip install lightgbm optuna

: 

In [1]:
import pandas as pd

df = pd.read_csv("train_features.csv")

target = "failure_24h"
X = df.drop(columns=[target, "RUL"])
y = df[target]

from sklearn.model_selection import train_test_split

units = df["unit"].unique()

train_units, val_units = train_test_split(
    units,
    test_size=0.2,
    random_state=42
)

train_idx = df["unit"].isin(train_units)
val_idx   = df["unit"].isin(val_units)

X_train, X_val = X.loc[train_idx], X.loc[val_idx]
y_train, y_val = y.loc[train_idx], y.loc[val_idx]

ModuleNotFoundError: No module named 'pandas'

In [None]:
from lightgbm import LGBMClassifier
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

lgbm_pipe = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("model", LGBMClassifier(
        n_estimators=500,
        learning_rate=0.05,
        num_leaves=31,
        class_weight="balanced",
        random_state=42,
        n_jobs=-1
    ))
])

lgbm_pipe.fit(X_train, y_train)

In [None]:
from sklearn.metrics import average_precision_score, precision_score, recall_score

y_probs = lgbm_pipe.predict_proba(X_val)[:, 1]

prauc = average_precision_score(y_val, y_probs)

y_pred = (y_probs >= 0.5).astype(int)

precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)

prauc, precision, recall

In [None]:
import optuna
from sklearn.metrics import average_precision_score

def objective(trial):

    params = {
        "n_estimators": trial.suggest_int("n_estimators", 300, 800),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2),
        "num_leaves": trial.suggest_int("num_leaves", 16, 128),
        "max_depth": trial.suggest_int("max_depth", 4, 16),
        "min_child_samples": trial.suggest_int("min_child_samples", 20, 100),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
    }

    model = Pipeline([
        ("imputer", SimpleImputer(strategy="median")),
        ("model", LGBMClassifier(
            **params,
            class_weight="balanced",
            random_state=42,
            n_jobs=-1
        ))
    ])

    model.fit(X_train, y_train)

    y_probs = model.predict_proba(X_val)[:, 1]

    prauc = average_precision_score(y_val, y_probs)

    return prauc


study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=25)

study.best_value, study.best_params

In [None]:
best_params = study.best_params

final_lgbm = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("model", LGBMClassifier(
        **best_params,
        class_weight="balanced",
        random_state=42,
        n_jobs=-1
    ))
])

final_lgbm.fit(X_train, y_train)

In [None]:
import numpy as np

y_probs = final_lgbm.predict_proba(X_val)[:, 1]

thresholds = np.linspace(0.1, 0.9, 50)

results = []

from sklearn.metrics import precision_score, recall_score

for t in thresholds:
    y_pred = (y_probs >= t).astype(int)
    results.append({
        "threshold": t,
        "precision": precision_score(y_val, y_pred),
        "recall": recall_score(y_val, y_pred),
    })

results_df = pd.DataFrame(results)
results_df.sort_values(by="precision", ascending=False).head(10)

In [None]:
import joblib

joblib.dump(final_lgbm, "factoryguard_final_model.joblib")
