In [205]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
import streamlit as st
import openai 

In [206]:
df = pd.read_csv(r'C:\Users\muzhg\Downloads\data.csv',encoding='latin-1')

In [207]:
df=df.rename(columns={"Unnamed: 8": "Battery Consumed (%)"})

In [208]:
df.head(1)

Unnamed: 0,Trip,Date,Route/Area,Weather,Battery Temperature (Start) [°C],Battery Temperature (End),Battery State of Charge (Start),Battery State of Charge (End),Battery Consumed (%),Ambient Temperature (Start) [°C],Target Cabin Temperature,Distance [km],Duration [min],Unnamed: 13,Fan,Note
0,TripA01,2019-06-25_13-21-14,Munich East,sunny,21.0,22.0,86.3%,80.3%,6.0%,25.5,23.0,7.43,16.82,,"Automatic, Level 1",


Clean all battery-related columns

In [210]:
battery_cols = [
    "Battery State of Charge (Start)",
    "Battery State of Charge (End)",
    "Battery Consumed (%)"
]

for col in battery_cols:
    # 1. Convert to string
    df[col] = df[col].astype(str)
    
    # 2. Remove % if present
    df[col] = df[col].str.replace("%", "", regex=False).str.strip()
    
    # 3. Convert to numeric, set errors to NaN
    df[col] = pd.to_numeric(df[col], errors="coerce")
    
    # 4. Fill missing values with mean
    df[col] = df[col].fillna(df[col].mean())

Model Training: Battery Consumption Prediction

In [227]:

features = [
    "Battery Temperature (Start) [°C]",
    "Battery Temperature (End)",
    "Battery State of Charge (Start)",
    "Battery State of Charge (End)",
    "Ambient Temperature (Start) [°C]",
    "Distance [km]",
    "Duration [min]",
    "Target Cabin Temperature",
]

target = "Battery Consumed (%)"

X = df[features].fillna(0)

y = df[target].fillna(df[target].mean())  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=200, max_depth=10, random_state=42)
model.fit(X_train, y_train)

pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, pred))
print("R2:", r2_score(y_test, pred))
joblib.dump(model, r"C:\Users\muzhg\.streamlit\models\rf_degradation.joblib")
print("Model saved successfully!")


MAE: 5.074409542483648
R2: -1.1217080069093606
Model saved successfully!


In [229]:
import os
os.path.exists(r"C:\Users\muzhg\.streamlit\models\rf_degradation.joblib")

True

 
MODEL_PATH = 'models/rf_degradation.joblib'
model = joblib.load(MODEL_PATH)

# set OpenAI key as env var
openai.api_key = st.secrets["OPENAI_API_KEY"]

st.title("Battery Design Optimization Assistant - New Dataset")

# Inputs
battery_temp_start = st.number_input("Battery Temperature (Start) [°C]", value=25.0)
battery_temp_end = st.number_input("Battery Temperature (End) [°C]", value=30.0)
soc_start = st.number_input("Battery State of Charge (Start) [%]", value=80.0)
soc_end = st.number_input("Battery State of Charge (End) [%]", value=60.0)
ambient_temp = st.number_input("Ambient Temperature (Start) [°C]", value=22.0)
distance = st.number_input("Distance [km]", value=10.0)
duration = st.number_input("Duration [min]", value=15.0)
target_cabin_temp = st.number_input("Target Cabin Temperature [°C]", value=22.0)


# I Create DataFrame for model prediction
X = pd.DataFrame([{
    "Battery Temperature (Start) [°C]": battery_temp_start,
    "Battery Temperature (End)": battery_temp_end,
    "Battery State of Charge (Start)": soc_start,
    "Battery State of Charge (End)": soc_end,
    "Ambient Temperature (Start) [°C]": ambient_temp,
    "Distance [km]": distance,
    "Duration [min]": duration,
    "Target Cabin Temperature": target_cabin_temp,
 
}])

# Predict
pred = model.predict(X)[0]
st.metric("Predicted Battery Consumed (%)", f"{pred:.3f}%")

# LLM Prompt
prompt = f"""
An ML model predicted battery consumed {pred:.3f}% for a trip with these conditions:
Battery Temperature Start={battery_temp_start}°C, Battery Temperature End={battery_temp_end}°C,
State of Charge Start={soc_start}%, End={soc_end}%, Ambient Temp={ambient_temp}°C, Distance={distance} km,
Duration={duration} min, Target Cabin Temp={target_cabin_temp}°C.
Explain why this might be happening and give 3 concise engineer-ready recommendations to reduce battery consumption.
"""

if st.button("Explain & Recommend"):
    response = openai.ChatCompletion.create(
        model="gpt-4o-mini",  # or your available LLM
        messages=[{"role":"user","content":prompt}],
        max_tokens=300
    )
    text = response["choices"][0]["message"]["content"]
    st.subheader("LLM Explanation & Recommendations")
    st.write(text)


In [214]:
st.subheader("Feature Importance")

importances = model.feature_importances_
feature_names = X.columns

fig, ax = plt.subplots()
sns.barplot(x=importances, y=feature_names, ax=ax)
ax.set_xlabel("Importance")
ax.set_ylabel("Feature")
ax.set_title("Which features influence battery consumption the most")

st.pyplot(fig)

DeltaGenerator()

In [215]:
import sys
print(sys.executable)

C:\Users\muzhg\anaconda3\python.exe


In [216]:
!C:\Users\muzhg\anaconda3\python.exe -m pip install joblib

