In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
import xgboost as xgb
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

df = pd.read_csv('/content/synthetic_soc_dataset_24V.csv')

X = df[['Voltage (V)', 'Current (A)', 'Temperature (°C)']]
y = df['SoC (%)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:

# rf_model = RandomForestRegressor(random_state=42)

# rf_params = {
#     'n_estimators': [200, 300],
#     'max_depth': [10, 20, None],
#     'min_samples_split': [2, 5],
#     'min_samples_leaf': [1, 2]
# }

# rf_grid = GridSearchCV(rf_model, rf_params, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
# rf_grid.fit(X_train_scaled, y_train)

# best_rf = rf_grid.best_estimator_
# y_pred_rf = best_rf.predict(X_test_scaled)

In [None]:

xgb_model = xgb.XGBRegressor(objective='reg:squarederror')

param_grid = {
    'n_estimators': [300, 500, 700],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [4, 6, 8],
    'subsample': [0.7, 0.9],
    'colsample_bytree': [0.7, 0.9],
    'gamma': [0, 0.1]
}

xgb_grid = GridSearchCV(xgb_model, param_grid, cv=5, scoring='neg_mean_absolute_error', n_jobs=-1)
xgb_grid.fit(X_train_scaled, y_train)

best_xgb = xgb_grid.best_estimator_
y_pred_xgb = best_xgb.predict(X_test_scaled)

In [None]:

# nn_model = Sequential([
#     Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
#     Dropout(0.3),
#     Dense(64, activation='relu'),
#     Dropout(0.2),
#     Dense(32, activation='relu'),
#     Dense(1)
# ])

# nn_model.compile(optimizer='adam', loss='mae', metrics=['mae'])

# early_stop = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# history = nn_model.fit(
#     X_train_scaled, y_train,
#     epochs=200,
#     batch_size=64,
#     validation_data=(X_test_scaled, y_test),
#     callbacks=[early_stop],
#     verbose=0
# )

# y_pred_nn = nn_model.predict(X_test_scaled).flatten()

In [None]:
# def evaluate_model(name, y_true, y_pred):
#     mae = mean_absolute_error(y_true, y_pred)
#     r2 = r2_score(y_true, y_pred)
#     print(f"{name} Performance:")
#     print(f"MAE: {mae:.4f}%")
#     print(f"R² Score: {r2:.4f}")
#     print("-"*40)

#     plt.figure(figsize=(8,6))
#     plt.scatter(y_true, y_pred, alpha=0.3)
#     plt.plot([0,100], [0,100], 'r--')
#     plt.title(f'{name} Prediction vs Actual')
#     plt.xlabel('Actual SoC (%)')
#     plt.ylabel('Predicted SoC (%)')
#     plt.show()

# # Evaluate all models
# evaluate_model("XGBoost", y_test, y_pred_xgb)
# evaluate_model("Random Forest", y_test, y_pred_rf)
# evaluate_model("Neural Network", y_test, y_pred_nn)

In [None]:

# plt.figure(figsize=(10,6))
# xgb.plot_importance(best_xgb)
# plt.title('XGBoost Feature Importance')
# plt.show()

# importance_df = pd.DataFrame({
#     'Feature': X.columns,
#     'Importance': best_xgb.feature_importances_
# }).sort_values('Importance', ascending=False)

# print("Feature Importance Ranking:")
# print(importance_df)

In [None]:
import joblib

final_model = best_xgb
joblib.dump(final_model, 'soc_predictor_xgb.pkl')
joblib.dump(scaler, 'scaler.pkl')




['scaler.pkl']

In [None]:

model = joblib.load('soc_predictor_xgb.pkl')
scaler = joblib.load('scaler.pkl')

new_data = np.array([[29, 3.5, 35]])
new_data_scaled = scaler.transform(new_data)

soc_prediction = model.predict(new_data_scaled)
print(f"Predicted SoC: {soc_prediction[0]:.2f}%")

Predicted SoC: 92.86%




In [None]:
# # Load real-time logger data
# logger_df = pd.read_csv('ev_sensor_log.csv')

# # Ensure correct column names
# logger_df.columns = logger_df.columns.str.strip()

# # Select features used in training
# input_features = ['Voltage (V)', 'Current (A)', 'Temperature (°C)']
# logger_inputs = logger_df[input_features]

# # Scale real-time inputs
# logger_scaled = scaler.transform(logger_inputs)

# # Predict SoC for each row
# logger_df['Predicted SoC (%)'] = model.predict(logger_scaled)

# # Output results
# print(logger_df[['Voltage (V)', 'Current (A)', 'Temperature (°C)', 'Predicted SoC (%)']].head())

# # Save to new CSV
# logger_df.to_csv('real_time_soc_predictions.csv', index=False)
# print("Saved predictions to 'real_time_soc_predictions.csv'")

   Voltage (V)  Current (A)  Temperature (°C)  Predicted SoC (%)
0        -0.06         0.53             33.06           2.235659
1        -0.05         0.63             33.12           2.221772
2        25.48        12.74             33.06          94.546822
3        25.37         3.58             33.06          92.881844
4        25.67         3.81             33.12          95.805260
Saved predictions to 'real_time_soc_predictions.csv'
