In [2]:
# Cell 1: Imports
import pandas as pd
import pickle
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import Ridge  # Changed import
from sklearn.metrics import mean_absolute_error, r2_score



In [3]:
# Cell 2: Load Data
df = pd.read_csv('../../data/processed/model_ready_ev_data.csv')



In [4]:
# Cell 3: Define Features (X) and Target (y) and Split
X = df[['Postal Code', 'Model Year', 'Prev_Year_EV_Count', 'Year_Delta']]
y = df['EV_Count']
X_train, X_test = X[X['Model Year'] <= 2022], X[X['Model Year'] > 2022]
y_train, y_test = y.loc[X_train.index], y.loc[X_test.index]



In [5]:
# Cell 4: Create Preprocessing and Model Pipeline
preprocessor = ColumnTransformer(transformers=[('num', 'passthrough', ['Model Year', 'Prev_Year_EV_Count', 'Year_Delta']), ('cat', OneHotEncoder(handle_unknown='ignore'), ['Postal Code'])])
pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('regressor', Ridge(alpha=1.0))]) # Changed model



In [6]:
# Cell 5: Train and Evaluate
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
print(f"--- Ridge Regression Evaluation ---")
print(f"Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred):.2f}")
print(f"R-squared (R2) Score: {r2_score(y_test, y_pred):.2f}")



--- Ridge Regression Evaluation ---
Mean Absolute Error (MAE): 67.84
R-squared (R2) Score: 0.29


In [7]:
# Cell 6: Save Model
model_path = '../../models/ridge_regression.pkl' # Changed filename
with open(model_path, 'wb') as file:
    pickle.dump(pipeline, file)
print(f"Model saved to: {model_path}")

Model saved to: ../../models/ridge_regression.pkl
