In [3]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
import joblib
from datetime import datetime

# Loading the Data

In [4]:
# -----------------------------------
# Load and clean the data
# -----------------------------------
df = pd.read_csv("combined_data.csv")
print("✅ Data loaded:", df.shape) #data loading check

✅ Data loaded: (51110, 68)


# Data Preprocessing

In [5]:
# Convert object columns to numeric
def clean_column(col):
    return pd.to_numeric(col.astype(str).str.replace(r"[^\d.-]", "", regex=True), errors='coerce')

for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = clean_column(df[col])

In [6]:
# Drop constant columns
nunique = df.nunique()
constant_cols = nunique[nunique <= 1].index.tolist()
df.drop(columns=constant_cols, inplace=True)
print(f"✅ Dropped {len(constant_cols)} constant columns")

✅ Dropped 25 constant columns


In [7]:
df.head()
df.columns

Index(['SOC', 'SOH', 'Voltage', 'Current', 'Cell Volt 1', 'Cell Volt 2',
       'Cell Volt 3', 'Cell Volt 4', 'Cell Volt 5', 'Cell Volt 6',
       'Cell Volt 7', 'Cell Volt 8', 'Cell Volt 9', 'Cell Volt 10',
       'Cell Volt 11', 'Cell Volt 12', 'Cell Volt 13', 'Cell Volt 14',
       'Cell Volt 15', 'Cell Volt 16', 'Temperature Sensor 1',
       'Temperature Sensor 2', 'Temperature Sensor 3', 'Temperature Sensor 4',
       'Temperature Sensor 5', 'Temperature Sensor 6', 'Cycle Count',
       'MCU Voltage', 'MCU Current', 'Throttle', 'Temperature',
       'Controller Temperature', 'Fault Code', 'rpm', 'Minimum cell Voltage',
       'Maximum cell Voltage Index', 'Maximum cell Voltage',
       'Maximum cell Voltage Index.1', 'Ambient Temperature',
       'Minimum Cell Temperature', 'Minimum Cell Temperature Index',
       'Maximum Cell Temperature', 'Maximum Cell Temperature Index'],
      dtype='object')

In [8]:
# -----------------------------------
# Define input and target columns
# -----------------------------------
input_columns = (
    ['SOC', 'SOH', 'Voltage', 'Current'] +
    [f'Cell Volt {i}' for i in range(1, 17)] +
    [f'Temperature Sensor {i}' for i in range(1, 7)]
)

target_columns = [f'Temperature Sensor {i}' for i in range(1, 6)]

df = df[input_columns].dropna()

In [9]:
temp_df = df[input_columns + target_columns].dropna()
temp_df.columns
temp_df.to_csv("feature_used_for_training.csv", index=False)

## Creating Future Targets

In [10]:
# -----------------------------------
# Create future targets
# -----------------------------------
shift_steps = 30  # ~5 minutes
for col in target_columns:
    df[f"{col}_future"] = df[col].shift(-shift_steps)

df.dropna(inplace=True)

# Final input/output columns
feature_cols = input_columns
target_cols = [f"{col}_future" for col in target_columns]

X = df[feature_cols]
y = df[target_cols]

## Spliting the Data

In [11]:
# -----------------------------------
# Train/test split
# -----------------------------------
split = int(0.8 * len(X))
X_train, X_test = X.iloc[:split], X.iloc[split:]
y_train, y_test = y.iloc[:split], y.iloc[split:]

# Training the Model

In [12]:
# -----------------------------------
# Train the model pipeline
# -----------------------------------
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestRegressor(n_estimators=150, max_depth=15, random_state=42, n_jobs=-1))
])

pipeline.fit(X_train, y_train)

0,1,2
,steps,"[('scaler', ...), ('model', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,n_estimators,150
,criterion,'squared_error'
,max_depth,15
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


# Evaluation

In [13]:
# -----------------------------------
# Evaluate
# -----------------------------------
y_pred = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"✅ MAE: {mae:.4f}")
print(f"✅ R²: {r2:.4f}")

✅ MAE: 0.9092
✅ R²: 0.8738


# Saving the Model

In [14]:
# -----------------------------------
# Save model & features
# -----------------------------------
joblib.dump(pipeline, "temperature_model_no_lag.pkl")
joblib.dump(feature_cols, "feature_list_no_lag.pkl")
print("✅ Model and features saved.")

✅ Model and features saved.


In [17]:

# ------------------------------
# Inference and Output Formatting
# ------------------------------

# Load model and feature list
model = joblib.load("temperature_model_no_lag.pkl")
features = joblib.load("feature_list_no_lag.pkl")

# Take a sample input (you can replace this with live input or a row from REPORT-3.csv)
sample_input = pd.DataFrame([X_test.iloc[0]], columns=features)

# Predict
predictions = model.predict(sample_input)[0]

# Format final output
# Print outputs line-by-line
predictions = [round(val, 2) for val in predictions]

print(f"BMS Temperature: {round(predictions[0], 2)}")
print(f"Battery Pack Temperature 1: {predictions[1]}")
print(f"Battery Pack Temperature 2: {predictions[2]}")
print(f"Battery Pack Temperature 3: {predictions[3]}")
print(f"Battery Pack Temperature 4: {predictions[4]}")
print(f"Timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")


BMS Temperature: 31.0
Battery Pack Temperature 1: 27.0
Battery Pack Temperature 2: 27.0
Battery Pack Temperature 3: 27.0
Battery Pack Temperature 4: 27.0
Timestamp: 2025-07-14 19:42:15
