In [1]:
import pandas as pd,warnings,numpy as np,matplotlib.pyplot as plt,seaborn as sns
from xgboost import XGBClassifier,XGBRegressor
from sklearn.model_selection import train_test_split,RandomizedSearchCV,GridSearchCV,cross_val_score
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix,mean_squared_error,r2_score
from sklearn.datasets import load_boston,load_iris
from sklearn.tree import plot_tree
warnings.filterwarnings('ignore')

In [2]:
def Evaluate_Regressor(model,X_test,y_test):
    y_pred = model.predict(X_test)
    errors = abs(y_pred - y_test)
    mape = 100 * np.mean(errors/y_test)
    accuracy = 100 - mape
    print('Model Performance')
    print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
    print('Accuracy = {:0.2f}%.'.format(accuracy))
    return accuracy

In [3]:
def R2_score(model,X,y,name_r_score):
    y_pred = model.predict(X)
    print('{}: {:.2%}'.format(name_r_score,r2_score(y,y_pred)))

# DATA

In [4]:
boston = load_boston()
X = pd.DataFrame(boston.data,columns=boston.feature_names)
y = pd.Series(boston.target)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.3,random_state=10)

# PARAMETERS

In [5]:
n_estimators = [100,150,200,250,500]
learning_rate = [0.05,0.10,0.15,0.20,0.25,0.30]
max_depth = [3,4,5,6,8,10,12,15]
min_child_weight = [1,3,5,7]
gamma = [0.0,0.1,0.2,0.3,0.4]
colsample_bytree = [0.3,0.4,0.5,0.7]

xg_params ={'n_estimators':n_estimators,'learning_rate':learning_rate,'max_depth':max_depth,
            'min_child_weight':min_child_weight,'gamma':gamma,'colsample_bytree':colsample_bytree}

# BASE MODEL

In [6]:
base_model = XGBRegressor()
base_model.fit(X_train,y_train)

# RandomizedSearchCV

In [7]:
def Hypertuning_rscv(model,params,n_iter,cv,X_train,y_train):
    random = RandomizedSearchCV(estimator=model, param_distributions=params, n_jobs=-1, n_iter=n_iter, cv=cv,verbose=2, random_state=42)
    random.fit(X_train,y_train)
    best_params = random.best_params_
    best_score = random.best_score_
    best_estimator = random.best_estimator_
    return best_params,best_score,best_estimator

In [8]:
best_params, best_score, best_estimator_random = Hypertuning_rscv(XGBRegressor(),xg_params,100,10,X_train,y_train)

Fitting 10 folds for each of 100 candidates, totalling 1000 fits


# GridSearchCV

In [9]:
def Hypertuning_gscv(model,params,cv,X_train,y_train):
    grid = GridSearchCV(estimator = model, param_grid = params, cv=cv, n_jobs=-1, verbose=2)
    grid.fit(X_train,y_train)
    best_score = grid.best_score_
    best_params = grid.best_params_
    best_estimator = grid.best_estimator_
    return best_params,best_score,best_estimator

In [10]:
# best_params, best_score, best_estimator_grid = Hypertuning_gscv(XGBRegressor(),xg_params,3,X_train,y_train)

# Evaluation

R2 SCORE

In [11]:
R2_score(base_model,X_train,y_train,'Train R2 score Base Model')
R2_score(base_model,X_test,y_test,'Test R2 score Base Model')
print('')
R2_score(best_estimator_random,X_train,y_train,'Train R2 score Randomimzed')
R2_score(best_estimator_random,X_test,y_test,'Test R2 score Randomized')
# print('')
# R2_score(best_estimator_grid,X_train,y_train,'Train R2 score Grid')
# R2_score(best_estimator_grid,X_test,y_test,'Test R2 scorey Grid')

Train R2 score Base Model: 100.00%
Test R2 score Base Model: 87.94%

Train R2 score Randomimzed: 98.93%
Test R2 score Randomized: 88.70%


MEAN ERROR & MAPE

In [12]:
base_accuracy = Evaluate_Regressor(base_model,X_test,y_test)

Model Performance
Average Error: 2.5361 degrees.
Accuracy = 88.54%.


In [13]:
random_accuracy_regressor = Evaluate_Regressor(best_estimator_random,X_test,y_test)

Model Performance
Average Error: 2.3005 degrees.
Accuracy = 89.62%.


In [14]:
print('Improvement of {:0.2f}%.'.format(100*(random_accuracy_regressor-base_accuracy)/base_accuracy))

Improvement of 1.21%.


In [15]:
# grid_accuracy_regressor = Evaluate_Regressor(best_estimator_grid,X_test,y_test)

In [16]:
# print('Improvement of {:0.2f}%.'.format(100*(grid_accuracy_regressor-base_accuracy)/base_accuracy))