In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

%matplotlib inline
sns.set_style('darkgrid')
plt.style.use('dark_background')

In [2]:
data = pd.read_csv("Battery_RUL.csv")
data.head()

Unnamed: 0,Cycle_Index,Discharge Time (s),Decrement 3.6-3.4V (s),Max. Voltage Dischar. (V),Min. Voltage Charg. (V),Time at 4.15V (s),Time constant current (s),Charging time (s),RUL
0,1.0,2595.3,1151.4885,3.67,3.211,5460.001,6755.01,10777.82,1112
1,2.0,7408.64,1172.5125,4.246,3.22,5508.992,6762.02,10500.35,1111
2,3.0,7393.76,1112.992,4.249,3.224,5508.993,6762.02,10420.38,1110
3,4.0,7385.5,1080.320667,4.25,3.225,5502.016,6762.02,10322.81,1109
4,6.0,65022.75,29813.487,4.29,3.398,5480.992,53213.54,56699.65,1107


In [4]:
data['Delta_Discharge_Time'] = data['Discharge Time (s)'].diff()
data["Rolling_Avg_Voltage"] = data["Max. Voltage Dischar. (V)"].rolling(window=5).mean()
data["Rolling_Avg_Discharge_Time"] = data["Discharge Time (s)"].rolling(window=5).mean()
data["Voltage_Change_Rate"] = data["Max. Voltage Dischar. (V)"].diff()
data["Discharge_Time_Change_Rate"] = data["Discharge Time (s)"].diff()


features = ["Rolling_Avg_Voltage","Rolling_Avg_Discharge_Time", 
            "Voltage_Change_Rate", "Discharge_Time_Change_Rate",
            "Delta_Discharge_Time", "Discharge Time (s)", 
            "Max. Voltage Dischar. (V)", "Min. Voltage Charg. (V)", 
            "Time constant current (s)", "Charging time (s)"]
target = "RUL"

scaler = StandardScaler()
data[features] = scaler.fit_transform(data[features])

# 📌 3️⃣ Train-Test Split
X = data[features]
y = data[target]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
xgb_model = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=500,      # Increase slightly for better learning
    max_depth=6,           # Allow slightly deeper trees
    learning_rate=0.04,    # Increase learning rate a bit
    subsample=0.85,        # Balance between underfitting/overfitting
    colsample_bytree=0.85, # Balance feature usage
    reg_lambda=3,          # Reduce L2 regularization
    reg_alpha=1,           # Reduce L1 regularization
    random_state=42
)

In [6]:
xgb_model.fit(X_train, y_train)

In [7]:
y_pred = xgb_model.predict(X_test)
y_pred

array([1045.515  ,  955.09326,  462.78082, ...,  361.62607,  254.90051,
        456.41605], dtype=float32)

In [8]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"✅ RMSE: {rmse:.2f}")
print(f"✅ R² Score: {r2:.2f}")

✅ RMSE: 28.68
✅ R² Score: 0.99


In [9]:
y_train_pred = xgb_model.predict(X_train)
train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
print(f"Train RMSE: {train_rmse:.2f}")

Train RMSE: 16.03


In [10]:
corr_matrix = data[features + [target]].corr()
print(corr_matrix["RUL"].sort_values(ascending=False))  # Check for high correlations (above 0.99)

RUL                           1.000000
Rolling_Avg_Voltage           0.918929
Max. Voltage Dischar. (V)     0.782800
Rolling_Avg_Discharge_Time    0.153242
Time constant current (s)     0.040675
Charging time (s)             0.018299
Discharge Time (s)            0.011957
Voltage_Change_Rate          -0.012016
Discharge_Time_Change_Rate   -0.049944
Delta_Discharge_Time         -0.049944
Min. Voltage Charg. (V)      -0.759805
Name: RUL, dtype: float64
