# With the Demo Dataset here I am going to make a model
# I will dump the pikle file of Random Forest model

# import the necessay Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the dataset

In [None]:
df = pd.read_csv("/content/synthetic_maintenance_data.csv")

In [None]:
df

Unnamed: 0,repair_day,warning_sensors,critical_sensors,good_sensors,predicted_rul,sensor_health,anomaly_level,cost
0,today,0,2,13,112,74.50,11.75,1701445
1,before_10_days,0,0,15,12,75.88,8.24,1410281
2,today,2,0,13,31,60.00,17.81,1930452
3,end_cycle,0,0,15,130,79.65,8.51,2022026
4,end_cycle,1,0,14,69,60.00,19.36,2680188
...,...,...,...,...,...,...,...,...
1995,today,2,2,11,102,70.07,17.32,2185712
1996,end_cycle,0,0,15,112,90.52,6.47,1854390
1997,end_cycle,1,2,12,71,79.19,9.59,2464111
1998,today,0,0,15,61,87.48,5.97,1395433


In [None]:
# Convert the category column

In [None]:
def encode_repair_day(row):
    if row['repair_day'] == 'before_10_days':
        return -10
    elif row['repair_day'] == 'today':
        return row['predicted_rul']
    elif row['repair_day'] == 'end_cycle':
        return 0
    elif row['repair_day'] == 'after_10_days':
        return 10
    else:
        return 0  # default

In [None]:
df['repair_day'] = df.apply(encode_repair_day, axis=1)

In [None]:
df

Unnamed: 0,repair_day,warning_sensors,critical_sensors,good_sensors,predicted_rul,sensor_health,anomaly_level,cost
0,112,0,2,13,112,74.50,11.75,1701445
1,-10,0,0,15,12,75.88,8.24,1410281
2,31,2,0,13,31,60.00,17.81,1930452
3,0,0,0,15,130,79.65,8.51,2022026
4,0,1,0,14,69,60.00,19.36,2680188
...,...,...,...,...,...,...,...,...
1995,102,2,2,11,102,70.07,17.32,2185712
1996,0,0,0,15,112,90.52,6.47,1854390
1997,0,1,2,12,71,79.19,9.59,2464111
1998,61,0,0,15,61,87.48,5.97,1395433


In [None]:
# Select features and target
X = df.drop(columns=['cost'])
y = df['cost']


In [None]:
X

Unnamed: 0,repair_day,warning_sensors,critical_sensors,good_sensors,predicted_rul,sensor_health,anomaly_level
0,112,0,2,13,112,74.50,11.75
1,-10,0,0,15,12,75.88,8.24
2,31,2,0,13,31,60.00,17.81
3,0,0,0,15,130,79.65,8.51
4,0,1,0,14,69,60.00,19.36
...,...,...,...,...,...,...,...
1995,102,2,2,11,102,70.07,17.32
1996,0,0,0,15,112,90.52,6.47
1997,0,1,2,12,71,79.19,9.59
1998,61,0,0,15,61,87.48,5.97


In [None]:
# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Train RandomForest
model = RandomForestRegressor(
    n_estimators=200,
    random_state=42,
    n_jobs=-1
)

In [None]:
model.fit(X_train, y_train)

In [None]:
# Predict
y_pred = model.predict(X_test)

In [None]:
# Evaluation metrics
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [None]:
print(f"✅ RMSE : {rmse:.3f}")
print(f"✅ MAE  : {mae:.3f}")
print(f"✅ R²   : {r2:.3f}")

✅ RMSE : 123887.423
✅ MAE  : 87640.263
✅ R²   : 0.941


In [None]:
# ✅ Save the model (pickle)
with open("cost_predictor.pkl", "wb") as f:
    pickle.dump(model, f)