# **Gradient Boost Implementation**

Dataset url: https://www.kaggle.com/datasets/avish5787/boston-data-set

In [24]:
# import the required libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

from sklearn.ensemble import GradientBoostingRegressor

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

from sklearn.model_selection import GridSearchCV

import warnings
from warnings import filterwarnings
filterwarnings("ignore")

In [15]:
df = pd.read_csv("/content/boston.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [16]:
X = df.drop(columns = "Price",axis=1) # Independent features
y = df["Price"] # Dependent Feature

In [19]:
X.head()

Unnamed: 0.1,Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [18]:
y

0      24.0
1      21.6
2      34.7
3      33.4
4      36.2
       ... 
501    22.4
502    20.6
503    23.9
504    22.0
505    11.9
Name: Price, Length: 506, dtype: float64

In [20]:
# Split dataset into training and testing

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 10)

## **Model training and Evaluation**

In [21]:
# create Gradient Boost classifier object

Gradient_Regressor = GradientBoostingRegressor(max_depth=2,n_estimators=3,learning_rate=1.0)

In [22]:
# train Gradient Boost Regressor
model = Gradient_Regressor.fit(X_train,y_train)

# predict the response for test dataset
y_pred = model.predict(X_test)

In [23]:
print(r2_score(y_pred,y_test))

0.4937262126606021


# **Tune the Hypermater**

In [27]:
param_grid = {
    "learning_rate":[0.15,0.1,0.10,0.05],
    "n_estimators":[100,150,200,150]
}

In [34]:
grid_search = GridSearchCV(estimator=GradientBoostingRegressor(),param_grid=param_grid,cv=5,n_jobs=-1,verbose=3,scoring="r2")

In [35]:
grid_search.fit(X_train,y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


In [37]:
grid_search.best_params_,grid_search.best_score_

({'learning_rate': 0.15, 'n_estimators': 150}, 0.8739259838392816)