In [17]:
#Packages
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error

In [45]:
#Loading the California housing dataset
cali = fetch_california_housing(as_frame=True)

In [46]:
#Training/test split
X = pd.DataFrame(cali.data)
y = cali.target
X_train_cali, X_test_cali, y_train_cali, y_test_cali = train_test_split(X, y, random_state=65, test_size=0.2)

In [None]:
#Fitting a boosting model 
xgb_mod=xgb.XGBRegressor(objective='reg:squarederror',
                         seed=33,
                         eta=0.1,
                         max_depth=2,
                         n_estimators=1000,
                         verbose=1)

xgb_mod_err=xgb_mod.fit(X_train_cali,
                        y_train_cali,
                        eval_metric='rmse',
                        eval_set=[(X_test_cali,y_test_cali)])

x = np.square(xgb_mod_err.evals_result()['validation_0']['rmse'])

In [None]:
#Fitting decision tree using cross-complexity pruning with 5-fold cross-validation
reg_dt = DecisionTreeRegressor(max_depth=10)
path_ccp = reg_dt.cost_complexity_pruning_path(X_train_cali,y_train_cali)
kfold = KFold(5,shuffle=True ,random_state =10)
grid = GridSearchCV(reg_dt ,{'ccp_alpha': path_ccp .ccp_alphas},refit=True ,cv=kfold ,scoring='neg_mean_squared_error')
reg_fit = grid.fit(X_train_cali,y_train_cali)
best_reg_fit = grid.best_estimator_
reg_predict = best_reg_fit.predict(X_test_cali)
np.sqrt(mean_squared_error(y_true = y_test_cali, y_pred= reg_predict))