In [None]:
import pickle
import pathlib

import numpy as np
import pandas as pd

In [None]:
DATA_DIR = pathlib.Path.cwd().parent / 'data'
print(DATA_DIR)
clean_data_path = DATA_DIR / 'processed' / 'ames_clean_eng.pkl'
with open(clean_data_path, 'rb') as file:
    data = pickle.load(file)

In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures, PowerTransformer, RobustScaler, StandardScaler, Normalizer
from sklearn.compose import ColumnTransformer

model_data = data.copy()

y = model_data["SalePrice"].copy()
X = model_data.drop(columns=["SalePrice"]).copy()
        
Xtrain, Xtest, ytrain, ytest = train_test_split(X,y,test_size=0.25,random_state=12,)

piped_model = Pipeline([
    ("Poly", PolynomialFeatures()),
    ("Scaler", StandardScaler()),
    ("Ridge", Ridge())
])

param_grid_r = {
    'Ridge__alpha': [0.1, 1.0, 10.0],  # List of alpha values to try
    'Poly__degree': [0, 1, 2],
}

grid_search_r = GridSearchCV(piped_model, param_grid_r, 
                              cv=5)

grid_search_r.fit(Xtrain,ytrain)

ypred = grid_search_r.predict(Xtest)
RMSE = np.sqrt(mean_squared_error(ytest, ypred))
print(f"RMSE = {RMSE}")
error_percent = 100 * (10**RMSE - 1)
print(f'Average error is {error_percent:.2f}%')