In [10]:
# WEEK 2 - Forecasting EV Demand Model

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import joblib

# Load the preprocessed CSV file
df = pd.read_csv("preprocessed_ev_data.csv")

# Converts the "Date" column to actual datetime objects
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')

# Removes rows where "Date" conversion failed
df = df[df['Date'].notnull()]

# Removes rows where the target (EV Total) is missing
df = df[df['Electric Vehicle (EV) Total'].notnull()]

# Fill missing values
df['County'] = df['County'].fillna('Unknown')
df['State'] = df['State'].fillna('Unknown')

# Convert numeric columns that are objects
cols_to_convert = ["Battery Electric Vehicles (BEVs)", "Plug-In Hybrid Electric Vehicles (PHEVs)", "Electric Vehicle (EV) Total", "Non-Electric Vehicle Total", "Total Vehicles"]
for col in cols_to_convert:
    df[col] = df[col].astype(str).str.replace(',', '', regex=False)
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Drop rows where numeric conversion failed
df.dropna(subset=cols_to_convert, inplace=True)

# Define outlier boundaries (using previously calculated Q1, Q3, IQR)
# Re-calculate Q1, Q3, IQR after converting columns to numeric
Q1 = df['Percent Electric Vehicles'].quantile(0.25)
Q3 = df['Percent Electric Vehicles'].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

# Cap the outliers
df['Percent Electric Vehicles'] = np.where(df['Percent Electric Vehicles'] > upper_bound, upper_bound,
                                 np.where(df['Percent Electric Vehicles'] < lower_bound, lower_bound, df['Percent Electric Vehicles']))


# Save the preprocessed data
df.to_csv("preprocessed_ev_data.csv", index=False)

# Load the preprocessed CSV file
df = pd.read_csv("preprocessed_ev_data.csv")

# Select features (X) and target (y)
X = df[["Battery Electric Vehicles (BEVs)", "Plug-In Hybrid Electric Vehicles (PHEVs)"]]
y = df["Electric Vehicle (EV) Total"]

# Train the model
model = LinearRegression()
model.fit(X, y)

# Save the model
joblib.dump(model, "forecasting_ev_model.pkl")

print("Model training complete and saved as forecasting_ev_model.pkl")

Model training complete and saved as forecasting_ev_model.pkl
