In [1]:
import pandas as pd,warnings,numpy as np,matplotlib.pyplot as plt,seaborn as sns
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split,RandomizedSearchCV,GridSearchCV,cross_val_score
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix,mean_squared_error,r2_score
from sklearn.datasets import load_boston,load_iris
from sklearn.tree import plot_tree
warnings.filterwarnings('ignore')

In [2]:
def Evaluate_Regressor(model,X_test,y_test):
    y_pred = model.predict(X_test)
    errors = abs(y_pred - y_test)
    mape = 100 * np.mean(errors/y_test)
    accuracy = 100 - mape
    print('Model Performance')
    print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
    print('Accuracy = {:0.2f}%.'.format(accuracy))
    return accuracy

In [3]:
def R2_score(model,X,y,name_r_score):
    y_pred = model.predict(X)
    print('{}: {:.2%}'.format(name_r_score,r2_score(y,y_pred)))

# DATA

In [4]:
boston = load_boston()
X = pd.DataFrame(boston.data,columns=boston.feature_names)
y = pd.Series(boston.target)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.3,random_state=10)

# PARAMETERS

In [5]:
boosting_type = ['gbdt','dart','goss','rf']
colsample_bytree = [0.3,0.4,0.5,0.7]
learning_rate = [0.05,0.10,0.15,0.20,0.25,0.30]
max_depth = [3,4,5,6,8,10,12,15]
min_child_weight = [1,3,5,7]
n_estimators = [100,150,200,250,500,1000]
num_leaves = [6,4,10,12,16,20,24,30]
early_stopping_round = [1,2,3,4]

lightgbm_params = {'boosting_type':boosting_type,'learning_rate':learning_rate,
                   'n_estimators':n_estimators,'max_depth':max_depth,
                   'colsample_bytree':colsample_bytree,'subsample':[0.7,0.8,0.9],
                   'min_child_samples':[1,5,10],'min_child_weight':min_child_weight,'num_leaves':num_leaves,
                   'early_stopping_round':early_stopping_round}

# BASE MODEL

In [7]:
base_model = LGBMRegressor()
base_model.fit(X_train,y_train)

# RandomizedSearchCV

In [8]:
def Hypertuning_rscv(model,params,n_iter,cv,X_train,y_train):
    random = RandomizedSearchCV(estimator=model, param_distributions=params, n_jobs=-1, n_iter=n_iter, cv=cv,verbose=2, random_state=42)
    random.fit(X_train,y_train)
    best_params = random.best_params_
    best_score = random.best_score_
    best_estimator = random.best_estimator_
    return best_params,best_score,best_estimator

In [9]:
best_params, best_score, best_estimator_random = Hypertuning_rscv(LGBMRegressor(),lightgbm_params,100,10,X_train,y_train)

Fitting 10 folds for each of 100 candidates, totalling 1000 fits


# GridSearchCV

In [10]:
def Hypertuning_gscv(model,params,cv,X_train,y_train):
    grid = GridSearchCV(estimator = model, param_grid = params, cv=cv, n_jobs=-1, verbose=2)
    grid.fit(X_train,y_train)
    best_score = grid.best_score_
    best_params = grid.best_params_
    best_estimator = grid.best_estimator_
    return best_params,best_score,best_estimator

In [11]:
# best_params, best_score, best_estimator_grid = Hypertuning_gscv(LGBMRegressor(),lightgbm_params,3,X_train,y_train)

# Evaluation

R2 SCORE

In [12]:
R2_score(base_model,X_train,y_train,'Train R2 score Base Model')
R2_score(base_model,X_test,y_test,'Test R2 score Base Model')
print('')
R2_score(best_estimator_random,X_train,y_train,'Train R2 score Randomimzed')
R2_score(best_estimator_random,X_test,y_test,'Test R2 score Randomized')
# print('')
# R2_score(best_estimator_grid,X_train,y_train,'Train R2 score Grid')
# R2_score(best_estimator_grid,X_test,y_test,'Test R2 scorey Grid')

Train R2 score Base Model: 97.20%
Test R2 score Base Model: 87.52%

Train R2 score Randomimzed: 95.39%
Test R2 score Randomized: 81.30%


MEAN ERROR & MAPE

In [13]:
base_accuracy = Evaluate_Regressor(base_model,X_test,y_test)

Model Performance
Average Error: 2.3042 degrees.
Accuracy = 89.76%.


In [14]:
random_accuracy_regressor = Evaluate_Regressor(best_estimator_random,X_test,y_test)

Model Performance
Average Error: 2.8650 degrees.
Accuracy = 88.87%.


In [15]:
print('Improvement of {:0.2f}%.'.format(100*(random_accuracy_regressor-base_accuracy)/base_accuracy))

Improvement of -0.99%.


In [16]:
# grid_accuracy_regressor = Evaluate_Regressor(best_estimator_grid,X_test,y_test)

In [17]:
# print('Improvement of {:0.2f}%.'.format(100*(grid_accuracy_regressor-base_accuracy)/base_accuracy))