In [2]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import cross_val_score, KFold
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

In [4]:
housing = fetch_california_housing()
X, y = housing.data, housing.target

poly = PolynomialFeatures(degree=2, include_bias=False)
X_poly = poly.fit_transform(X)

pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2, include_bias=False)),
    ('regressor', LinearRegression())
])

kf = KFold(n_splits=10, shuffle=True, random_state=1)
cv_scores = cross_val_score(pipeline, X, y, cv=kf, scoring='neg_mean_squared_error')

rmse_scores = np.sqrt(-cv_scores)
mean_rmse = np.mean(rmse_scores)

print(f'Cross validation RMSE Scores: {rmse_scores}')
print(f'Mean RMSE: {mean_rmse}')


CV RMSE Scores: [0.66940941 0.64942148 0.66551324 0.64590929 2.11241526 0.65395091
 0.65721026 5.78715067 0.64516044 0.76717789]
Mean RMSE: 1.3253318845382183
