In [205]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
import streamlit as st
import openai 

In [206]:
df = pd.read_csv(r'C:\Users\muzhg\Downloads\data.csv',encoding='latin-1')

In [207]:
df=df.rename(columns={"Unnamed: 8": "Battery Consumed (%)"})

In [208]:
df.head(1)

Unnamed: 0,Trip,Date,Route/Area,Weather,Battery Temperature (Start) [°C],Battery Temperature (End),Battery State of Charge (Start),Battery State of Charge (End),Battery Consumed (%),Ambient Temperature (Start) [°C],Target Cabin Temperature,Distance [km],Duration [min],Unnamed: 13,Fan,Note
0,TripA01,2019-06-25_13-21-14,Munich East,sunny,21.0,22.0,86.3%,80.3%,6.0%,25.5,23.0,7.43,16.82,,"Automatic, Level 1",


Clean all battery-related columns

In [210]:
battery_cols = [
    "Battery State of Charge (Start)",
    "Battery State of Charge (End)",
    "Battery Consumed (%)"
]

for col in battery_cols:
    # 1. Convert to string
    df[col] = df[col].astype(str)
    
    # 2. Remove % if present
    df[col] = df[col].str.replace("%", "", regex=False).str.strip()
    
    # 3. Convert to numeric, set errors to NaN
    df[col] = pd.to_numeric(df[col], errors="coerce")
    
    # 4. Fill missing values with mean
    df[col] = df[col].fillna(df[col].mean())

Model Training: Battery Consumption Prediction

In [227]:

features = [
    "Battery Temperature (Start) [°C]",
    "Battery Temperature (End)",
    "Battery State of Charge (Start)",
    "Battery State of Charge (End)",
    "Ambient Temperature (Start) [°C]",
    "Distance [km]",
    "Duration [min]",
    "Target Cabin Temperature",
]

target = "Battery Consumed (%)"

X = df[features].fillna(0)

y = df[target].fillna(df[target].mean())  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
model.fit(X_train, y_train)

pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, pred))
print("R2:", r2_score(y_test, pred))
joblib.dump(model, r"C:\Users\muzhg\.streamlit\models\rf_degradation.joblib")
print("Model saved successfully!")


MAE: 5.074409542483648
R2: -1.1217080069093606
Model saved successfully!


In [214]:
st.subheader("Feature Importance")

importances = model.feature_importances_
feature_names = X.columns

fig, ax = plt.subplots()
sns.barplot(x=importances, y=feature_names, ax=ax)
ax.set_xlabel("Importance")
ax.set_ylabel("Feature")
ax.set_title("Which features influence battery consumption the most")

st.pyplot(fig)

DeltaGenerator()