In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib



In [2]:
# 1. Load Data
# Assuming 'ev_battery_charging_data.csv' is in the same directory
df = pd.read_csv('ev_battery_charging_data.csv')



In [3]:
# 2. Select Features and Target
# Features (X) based on user's request (2-3 features)
# We choose: Battery Type, Charging Duration, and Charging Cycles
X = df[['Battery Type', 'Charging Duration (min)', 'Charging Cycles']]
y = df['Degradation Rate (%)']



In [4]:
# 3. Preprocessing Setup
# Identify categorical and numerical features
categorical_features = ['Battery Type']
numerical_features = ['Charging Duration (min)', 'Charging Cycles']

# Create preprocessor pipeline for scaling numeric and one-hot encoding categorical
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ],
    remainder='passthrough'
)



In [5]:
# 4. Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



In [6]:
# 5. Model Training and Evaluation

models = {
    "Linear Regression": LinearRegression(),
    "Random Forest Regressor": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting Regressor": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
}

results = {}
best_model_name = ""
best_mae = float('inf')

for name, model in models.items():
    # Create a full pipeline (preprocessor + model)
    pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                               ('regressor', model)])
    
    # Train the model
    pipeline.fit(X_train, y_train)
    
    # Predict and evaluate
    y_pred = pipeline.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    results[name] = {"MAE": mae, "R2": r2, "Model_Pipeline": pipeline}
    
    print(f"--- {name} ---")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"R-squared (R2): {r2:.4f}\n")
    
    # Choose Best Model (Minimize MAE)
    if mae < best_mae:
        best_mae = mae
        best_model_name = name



--- Linear Regression ---
Mean Absolute Error (MAE): 1.0934
R-squared (R2): 0.7422

--- Random Forest Regressor ---
Mean Absolute Error (MAE): 1.1665
R-squared (R2): 0.7055

--- Gradient Boosting Regressor ---
Mean Absolute Error (MAE): 1.0803
R-squared (R2): 0.7384



In [7]:
# 6. Save the Best Model
best_pipeline = results[best_model_name]['Model_Pipeline']
joblib.dump(best_pipeline, 'battery_model.pkl')

print(f"Selected Best Model: {best_model_name} with MAE: {best_mae:.4f}")
print("Model saved as battery_model.pkl")

Selected Best Model: Gradient Boosting Regressor with MAE: 1.0803
Model saved as battery_model.pkl
