In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,GridSearchCV
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.datasets import fetch_california_housing

In [2]:
# LOAD DATASET
data = fetch_california_housing()
x= pd.DataFrame(data.data, columns=data.feature_names)
y = data.target  

In [3]:
# SPLIT INTO TRAINING AND TESTING DATA
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)

In [4]:
# DEFINE HYPERPARAMER GRID
param_grid={
            'n_estimators':[50,100,200],
            'learning_rate':[0.01,0.1,0.3,0.5],
            'max_depth':[3,5,7],
            'subsample':[0.7,0.8,1.0],
            'colsample_bytree':[0.7,0.8,0.1]
           }

In [5]:
# CREATE GRADIENTBOOSTING REGRESSOR
xgb_regressor=XGBRegressor(objective='reg:squarederror',random_state=42)

In [None]:
# PERFORM GRID SEARCH
grid_search=GridSearchCV(xgb_regressor,param_grid,cv=5,scoring='r2',n_jobs=-1)
grid_search.fit(x_train,y_train)

In [None]:
# BEST PARAMETERS
print("Best Parameters:",grid_search.best_params_)

In [None]:
# TRAIN WITH BEST PARAMETERS
best_model=grid_search.best_estimator_
y_pred=best_model.predict(x_test)

In [None]:
# EVALUATE METRICS
mae=mean_absolute_error(y_test,y_pred)
mse=mean_squared_error(y_test,y_pred)
r2=r2_score(y_test,y_pred)
print(mae)
print(mse)
print(r2)

In [None]:
# PLOT FEATURE IMPORTANCE
importances = best_model.feature_importances_
plt.figure(figsize=(10, 5))
plt.barh(x.columns, importances)
plt.xlabel('Feature Importance')
plt.ylabel('Feature')
plt.title('Best xg Boost Feature Importance')
plt.show()