In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [2]:
try:
    from xgboost import XGBRegressor
    xgb_installed = True
except ImportError:
    xgb_installed = False

In [3]:
df = pd.read_csv("Battery_dataset.csv")

In [4]:
le = LabelEncoder()
df['battery_id'] = le.fit_transform(df['battery_id'])


In [5]:
features = ['battery_id', 'cycle', 'chI', 'chV', 'chT', 'disI', 'disV', 'disT', 'BCt']
target_soh = 'SOH'
target_rul = 'RUL'

X = df[features]
y_soh = df[target_soh]
y_rul = df[target_rul]


In [6]:
X_train, X_test, y_soh_train, y_soh_test, y_rul_train, y_rul_test = train_test_split(
    X, y_soh, y_rul, test_size=0.2, random_state=42
)

In [7]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [8]:
models = {
    "RandomForest": RandomForestRegressor(n_estimators=100, random_state=42),
    "GradientBoosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
}

In [9]:
if xgb_installed:
    models["XGBoost"] = XGBRegressor(n_estimators=100, random_state=42)


In [10]:
print("====== Predicting SOH ======")
for name, model in models.items():
    model.fit(X_train_scaled, y_soh_train)
    preds = model.predict(X_test_scaled)
    print(f"{name} - R2: {r2_score(y_soh_test, preds):.4f}, MSE: {mean_squared_error(y_soh_test, preds):.4f}")

print("\n====== Predicting RUL ======")
for name, model in models.items():
    model.fit(X_train_scaled, y_rul_train)
    preds = model.predict(X_test_scaled)
    print(f"{name} - R2: {r2_score(y_rul_test, preds):.4f}, MSE: {mean_squared_error(y_rul_test, preds):.4f}")


RandomForest - R2: 0.9999, MSE: 0.0193
GradientBoosting - R2: 0.9999, MSE: 0.0360
XGBoost - R2: 0.9996, MSE: 0.0954

RandomForest - R2: 0.9970, MSE: 11.2381
GradientBoosting - R2: 0.9983, MSE: 6.4764
XGBoost - R2: 0.9983, MSE: 6.4380


In [11]:
rf_soh = RandomForestRegressor(n_estimators=100, random_state=42)
rf_soh.fit(X_train_scaled, y_soh_train)

RandomForestRegressor(random_state=42)

In [12]:
xgb_rul = XGBRegressor(n_estimators=100, random_state=42)
xgb_rul.fit(X_train_scaled, y_rul_train)

XGBRegressor(base_score=None, booster=None, callbacks=None,
             colsample_bylevel=None, colsample_bynode=None,
             colsample_bytree=None, device=None, early_stopping_rounds=None,
             enable_categorical=False, eval_metric=None, feature_types=None,
             gamma=None, grow_policy=None, importance_type=None,
             interaction_constraints=None, learning_rate=None, max_bin=None,
             max_cat_threshold=None, max_cat_to_onehot=None,
             max_delta_step=None, max_depth=None, max_leaves=None,
             min_child_weight=None, missing=nan, monotone_constraints=None,
             multi_strategy=None, n_estimators=100, n_jobs=None,
             num_parallel_tree=None, random_state=42, ...)

In [16]:
joblib.dump(rf_soh, "soh_model.pkl")
joblib.dump(xgb_rul, "rul_model.pkl")
joblib.dump(scaler, "scaler.pkl")  # Save the scaler too
print("Models and scaler saved successfully!")

Models and scaler saved successfully!


In [17]:
from sklearn.preprocessing import LabelEncoder
import joblib

battery_ids = ["B5", "B6", "B7"]
le_battery = LabelEncoder()
le_battery.fit(battery_ids)

joblib.dump(le_battery, "battery_le.pkl")

print("LabelEncoder saved as battery_le.pkl")

LabelEncoder saved as battery_le.pkl
