In [14]:
# 1. Import Libraries
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 2. Load the raw data
df = pd.read_csv("Electric_Vehicle_Population_By_County.csv")
df.head()

# 3. Data Preprocessing
df.dropna(subset=["County", "State"], inplace=True)

for col in [
    "Battery Electric Vehicles (BEVs)",
    "Plug-In Hybrid Electric Vehicles (PHEVs)",
    "Electric Vehicle (EV) Total",
    "Non-Electric Vehicle Total",
    "Total Vehicles"
]:
    df[col] = df[col].str.replace(",", "").astype(int)

df["Date"] = pd.to_datetime(df["Date"])
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month

df = pd.get_dummies(df, columns=["County", "State", "Vehicle Primary Use"], drop_first=True)

# Save the preprocessed data
df.to_csv("preprocessed_ev_data.csv", index=False)

# 4. Define features and target
X = df.drop(columns=["Electric Vehicle (EV) Total", "Date"])
y = df["Electric Vehicle (EV) Total"]


# 5. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 6. Train Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 7. Evaluation
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R² Score:", r2)

# 8. Save the trained model
with open("forecasting_ev_model.pkl", "wb") as f:
    pickle.dump(model, f)









Mean Squared Error: 15560.369428213175
R² Score: 0.9988634103281181
