In [2]:
import joblib
import pandas as pd

model = joblib.load("xgb_model.pkl")

# Load new machine data (must be preprocessed like training data)
new_data = pd.read_csv("incoming_machine_data.csv")

# Clean column names like before (very important)
def clean_column_names(df):
    df = df.copy()
    df.columns = (
        df.columns.str.replace('[\[\]<>]', '', regex=True)
                  .str.replace(' ', '_')
    )
    return df

new_data_clean = clean_column_names(new_data)

# Predict probabilities
probs = model.predict_proba(new_data_clean)

# Assign risk levels
df_results = pd.DataFrame({
    'Failure_Probability': probs[:, 1],
    'Prediction': model.predict(new_data_clean)
})
df_results['Risk_Level'] = pd.qcut(
    df_results['Failure_Probability'],
    q=3,
    labels=['Low', 'Medium', 'High']
)

print(df_results)


    Failure_Probability  Prediction Risk_Level
0              0.000035           0        Low
1              0.001068           0       High
2              0.000814           0       High
3              0.000055           0        Low
4              0.000060           0     Medium
5              0.000052           0        Low
6              0.000001           0        Low
7              0.000051           0        Low
8              0.000395           0     Medium
9              0.001284           0       High
10             0.012743           0       High
11             0.003491           0       High
12             0.000102           0     Medium
13             0.000143           0     Medium
14             0.000059           0     Medium
