# **Battery SOH Forecasting**


In [247]:
import pandas as pd
import numpy as np
import os
import joblib
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

In [248]:
# Load the dataset
df_raw = pd.read_csv('./Data/RawData/combined_data.csv')


# Ensure column names are usable
df_raw.columns = df_raw.columns.astype(str).str.strip()
df_raw = df_raw.loc[:, ~df_raw.columns.duplicated()]  # Remove duplicate columns if any

# - `BatSOH` → State of Health
# - `BatCycleCount` → Total charge-discharge cycles
# - `BatVolt` → Voltage
# - `BatTemp` → Temperature
# - `BatPercent` → State of Charge
# - `BatCurrent` → Current
# - `Battery Error` → General error flag
# - `Tbox Internal Bat Volt` → May represent auxiliary power system health

# Other useful predictors:

# - `ThrottlePercent` (usage intensity)
# - `MotorTemp`, `InverterTemp` (related to load)
# - `ctime` (timestamp for temporal patterns)
# - `BrakeStatus`, `GearInformation` (driving mode behavior)

# Copy and show top 20
df_enhanced = df_raw.copy()
features = ['tboxId', 'BatCycleCount', 'BatVolt', 'BatTemp', 'BatPercent', 'BatCurrent', 'Battery Error', 'Tbox Internal Bat Volt', 'ThrottlePercent', 'MotorTemp', 'InverterTemp', 'ctime', 'BrakeStatus', 'GearInformation', 'BatSOH']

df_enhanced = df_enhanced[features]

In [249]:
# print(df_enhanced['Battery Error'].value_counts())

# Clean the 'Battery Error' column
df_enhanced['Battery Error'] = df_enhanced['Battery Error'].str.strip()

# Replace empty strings or NaN values with 'Error'
df_enhanced['Battery Error'] = df_enhanced['Battery Error'].replace('', 'Error')

# Convert Unix timestamp to datetime in UTC+5:30 (Asia/Kolkata)
df_enhanced['Time_stamp'] = pd.to_datetime(df_enhanced['ctime'], unit='s')
df_enhanced['Time_stamp'] = df_enhanced['Time_stamp'].dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')

# Now count the unique values
# print(df_enhanced['Battery Error'].value_counts())

print(df_enhanced.head(20).to_string(index=False))

         tboxId  BatCycleCount  BatVolt  BatTemp  BatPercent  BatCurrent Battery Error  Tbox Internal Bat Volt  ThrottlePercent  MotorTemp  InverterTemp      ctime  BrakeStatus  GearInformation  BatSOH                Time_stamp
865209069519313              0        0        0           0           0         Error                      80                0          0             0 1738377365            0                0       0 2025-02-01 08:06:05+05:30
865209069519313              0        0        0           0           0         Error                      80                0          0             0 1738377365            0                0       0 2025-02-01 08:06:05+05:30
865209069519313              0        0        0           0           0         Error                      80                0          0             0 1738377365            0                0       0 2025-02-01 08:06:05+05:30
865209069519313              0        0        0           0           0         Error  

In [250]:
# sample_size = min(len(df_enhanced), 50000)
# df_sampled = df_enhanced.sample(n=sample_size, random_state=1)
df_enhanced.to_csv('battery_data.csv', index=False)

In [251]:
df_enhanced['Battery Error'].value_counts()

No Error    46094
Error          18
Name: Battery Error, dtype: int64

In [252]:



# X = df[features]
# y = df['BatSOH']

# from IPython.display import display

# Show the first 10 rows in a clean table
# display(df.head(10))

# # Train-test split
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# # Train model
# model = XGBRegressor(n_estimators=100)
# model.fit(X_train, y_train)

# # Evaluate
# y_pred = model.predict(X_test)
# print("MSE:", mean_squared_error(y_test, y_pred))

# # Save model
# joblib.dump(model, "model/xgb_model.pkl")

### ✅ Online Learning using River

In [253]:
from river import linear_model, preprocessing
import joblib

# Load batch-trained model
xgb_model = joblib.load("model/xgb_model.pkl")

# River model for online learning
river_model = preprocessing.StandardScaler() | linear_model.LinearRegression()

# Simulate live data
df = pd.read_csv("data/battery_data.csv")
df.columns = df.columns.str.strip()
df['ctime'] = pd.to_datetime(df['ctime'], unit='s')
df = df.dropna()

features = ['BatTemp', 'BatCycleCount', 'BatVolt', 'ThrottlePercent', 'BatCurrent', 'MotorTemp']

for i, row in df.iterrows():
    X = {col: row[col] for col in features}
    actual_soh = row['BatSOH']

    # Predict with batch model
    xgb_pred = xgb_model.predict([list(X.values())])[0]

    # Online correction from River
    river_correction = river_model.predict_one(X) or 0.0
    final_pred = xgb_pred + river_correction

    # Print result
    print(f"[{row['ctime']}] Final SOH Prediction: {round(final_pred,2)} (Actual: {actual_soh})")

    # Alert if SOH is too low
    if final_pred < 80:
        print("\U0001F6A8 ALERT: Battery SOH will drop below 80!")

    # Update River model
    river_model.learn_one(X, actual_soh - xgb_pred)

ModuleNotFoundError: No module named 'river'