In [1]:
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor

from sklearn.model_selection import train_test_split, RandomizedSearchCV

from sklearn.pipeline import Pipeline

from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import classification_report, accuracy_score, r2_score

import random

import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

In [2]:
min_range = 0
max_range = 500

In [3]:
rand_int_min = -50
rand_int_max = 50

## Gradient Boost Classifier

In [4]:
# 3 columns
x = [[r+random.randint(rand_int_min, rand_int_max), r+random.randint(rand_int_min, rand_int_max), r+random.randint(rand_int_min, rand_int_max)] for r in range(min_range,max_range)]
x

[[-4, -16, -11],
 [-1, 22, -28],
 [35, 10, -7],
 [38, -27, 12],
 [17, 18, -5],
 [-35, -11, -42],
 [16, -34, -35],
 [52, 25, 14],
 [-39, 39, 34],
 [51, 29, -23],
 [0, 12, -24],
 [-12, 22, 18],
 [9, -2, 23],
 [45, 41, 2],
 [-19, 38, 1],
 [61, 60, 40],
 [52, 66, 51],
 [26, 57, 19],
 [-1, -30, 66],
 [-5, 36, 17],
 [68, 4, 70],
 [-22, 42, 31],
 [44, 56, 51],
 [66, 29, 48],
 [10, 24, 13],
 [12, 8, 3],
 [15, 73, 10],
 [15, 5, 61],
 [-11, 71, 40],
 [37, 55, 79],
 [50, 48, 6],
 [73, 64, 74],
 [81, 73, 70],
 [39, 4, 15],
 [-3, 59, 63],
 [4, 73, 35],
 [50, -2, 57],
 [-2, 23, 33],
 [29, 37, -6],
 [21, 32, 13],
 [66, 77, 75],
 [-5, 29, 64],
 [37, 88, 40],
 [40, -1, 4],
 [83, 47, 78],
 [29, 67, 1],
 [48, 5, 68],
 [34, 57, 91],
 [40, 98, 67],
 [92, 76, 88],
 [40, 8, 40],
 [73, 75, 36],
 [62, 40, 59],
 [60, 78, 8],
 [80, 4, 86],
 [30, 19, 100],
 [22, 97, 12],
 [106, 52, 84],
 [22, 14, 15],
 [91, 71, 76],
 [50, 68, 10],
 [74, 62, 63],
 [77, 25, 41],
 [54, 78, 21],
 [88, 26, 47],
 [88, 19, 50],
 [54, 85

In [5]:
y = [random.randint(0, 1) for r in range(min_range,max_range)]
y

[1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,


In [6]:
x_train_clf, x_test_clf, y_train_clf, y_test_clf = train_test_split(x, y, train_size=.7, test_size=.3)

In [7]:
grad_boost_clf = GradientBoostingClassifier()

In [8]:
grad_boost_clf.fit(x_train_clf, y_train_clf)

In [9]:
grad_boost_pred_class = grad_boost_clf.predict(x_test_clf)

In [10]:
mean_squared_error(y_test_clf, grad_boost_pred_class)

0.6266666666666667

In [11]:
mean_absolute_error(y_test_clf, grad_boost_pred_class)

0.6266666666666667

In [12]:
accuracy_score(y_test_clf, grad_boost_pred_class)

0.37333333333333335

In [13]:
cr = classification_report(y_test_clf, grad_boost_pred_class)
print(cr)

              precision    recall  f1-score   support

           0       0.35      0.39      0.36        70
           1       0.40      0.36      0.38        80

    accuracy                           0.37       150
   macro avg       0.37      0.37      0.37       150
weighted avg       0.38      0.37      0.37       150



## Gradient Boost Regressor

In [14]:
# 3 columns
x = [[r+random.randint(rand_int_min, rand_int_max), r+random.randint(rand_int_min, rand_int_max), r+random.randint(rand_int_min, rand_int_max)] for r in range(min_range,max_range)]
x

[[-2, 45, -32],
 [-29, -48, 12],
 [23, 19, 5],
 [35, -45, 48],
 [48, 48, -8],
 [-45, -41, -25],
 [-35, 53, 12],
 [-33, 25, 46],
 [-3, 10, 57],
 [9, -11, -31],
 [0, 26, 60],
 [58, 47, -27],
 [-4, -3, -29],
 [23, -35, 5],
 [0, 1, 11],
 [32, 25, -32],
 [11, 65, -9],
 [44, -8, -13],
 [3, -21, -23],
 [-13, 60, 61],
 [27, 66, 20],
 [0, 25, 70],
 [63, 15, -8],
 [-3, 0, 45],
 [38, 69, 49],
 [55, -24, 23],
 [45, 18, 17],
 [72, -19, 75],
 [25, 14, -21],
 [5, 54, 68],
 [3, 46, 46],
 [32, 42, 64],
 [17, 64, -6],
 [43, 35, 70],
 [75, 70, 56],
 [20, 6, 65],
 [53, 63, 11],
 [28, 71, 81],
 [12, -6, 15],
 [43, -8, 61],
 [22, -5, 37],
 [0, 15, 75],
 [49, 75, 86],
 [3, 3, 29],
 [36, -5, 29],
 [72, 35, 56],
 [41, 7, 51],
 [28, 28, 28],
 [92, 0, 7],
 [87, -1, 99],
 [35, 93, 25],
 [39, 77, 36],
 [89, 86, 12],
 [97, 39, 47],
 [7, 94, 71],
 [105, 101, 87],
 [12, 75, 105],
 [107, 27, 46],
 [74, 85, 74],
 [55, 33, 94],
 [58, 63, 32],
 [89, 80, 59],
 [96, 48, 66],
 [34, 84, 82],
 [104, 22, 85],
 [39, 41, 110],
 

In [15]:
y = [r+random.randint(rand_int_min, rand_int_max) for r in range(min_range,max_range)]
y

[-13,
 47,
 -41,
 35,
 35,
 25,
 26,
 56,
 22,
 -19,
 -11,
 43,
 45,
 20,
 -16,
 -1,
 23,
 -27,
 38,
 -17,
 56,
 5,
 16,
 15,
 -5,
 23,
 42,
 27,
 64,
 35,
 -11,
 -19,
 0,
 28,
 25,
 51,
 50,
 15,
 -4,
 40,
 54,
 43,
 14,
 92,
 53,
 26,
 18,
 39,
 24,
 69,
 11,
 81,
 70,
 34,
 42,
 104,
 20,
 62,
 65,
 45,
 61,
 34,
 37,
 68,
 69,
 58,
 52,
 17,
 103,
 25,
 102,
 86,
 85,
 43,
 103,
 122,
 46,
 126,
 125,
 66,
 67,
 84,
 70,
 79,
 73,
 93,
 106,
 45,
 52,
 70,
 127,
 46,
 87,
 77,
 87,
 52,
 71,
 72,
 92,
 116,
 51,
 70,
 128,
 139,
 54,
 67,
 129,
 110,
 100,
 138,
 75,
 68,
 157,
 152,
 87,
 133,
 143,
 125,
 106,
 76,
 131,
 148,
 103,
 119,
 110,
 150,
 176,
 141,
 130,
 154,
 82,
 87,
 180,
 154,
 136,
 120,
 181,
 186,
 174,
 93,
 119,
 129,
 149,
 138,
 128,
 172,
 188,
 175,
 128,
 118,
 108,
 134,
 159,
 121,
 177,
 141,
 150,
 108,
 136,
 121,
 160,
 182,
 191,
 154,
 175,
 144,
 165,
 210,
 150,
 173,
 152,
 127,
 222,
 152,
 146,
 129,
 189,
 172,
 172,
 140,
 152,
 134,
 1

In [16]:
x_train_reg, x_test_reg, y_train_reg, y_test_reg = train_test_split(x, y, train_size=.7, test_size=.3)

In [17]:
grad_boost_reg = GradientBoostingRegressor()

In [18]:
grad_boost_reg.fit(x_train_reg, y_train_reg)

In [19]:
grad_boost_pred_reg = grad_boost_reg.predict(x_test_reg)

In [20]:
mean_squared_error(y_test_reg, grad_boost_pred_reg)

1309.2411851741228

In [21]:
mean_absolute_error(y_test_reg, grad_boost_pred_reg)

29.817110284221442

In [22]:
r2_score(y_test_reg, grad_boost_pred_reg)

0.9379186331686109

## Gradient Boost Pipeline

### Gradient Boost Classification Pipeline

In [23]:
grad_boost_clf_pipeline = Pipeline([
    ('grad_boost', GradientBoostingClassifier())
])

In [24]:
grad_boost_clf_pipeline.get_params().keys()

dict_keys(['memory', 'steps', 'verbose', 'grad_boost', 'grad_boost__ccp_alpha', 'grad_boost__criterion', 'grad_boost__init', 'grad_boost__learning_rate', 'grad_boost__loss', 'grad_boost__max_depth', 'grad_boost__max_features', 'grad_boost__max_leaf_nodes', 'grad_boost__min_impurity_decrease', 'grad_boost__min_samples_leaf', 'grad_boost__min_samples_split', 'grad_boost__min_weight_fraction_leaf', 'grad_boost__n_estimators', 'grad_boost__n_iter_no_change', 'grad_boost__random_state', 'grad_boost__subsample', 'grad_boost__tol', 'grad_boost__validation_fraction', 'grad_boost__verbose', 'grad_boost__warm_start'])

In [25]:
grad_boost_clf_param_grid = {
    'grad_boost__loss': ['log_loss', 'exponential'],
    'grad_boost__learning_rate': [.3, .5, .7],
    'grad_boost__criterion': ['friedman_mse', 'squared_error'],
    'grad_boost__n_estimators': [50,100,150],
    'grad_boost__random_state': [0,10,42]
}

In [26]:
grad_boost_clf_random_search = RandomizedSearchCV(grad_boost_clf_pipeline, grad_boost_clf_param_grid)

In [27]:
grad_boost_clf_random_search.fit(x_train_clf, y_train_clf)

In [28]:
grad_boost_clf_rand_search_pred = grad_boost_clf_random_search.predict(x_test_clf)
grad_boost_clf_rand_search_pred

array([0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1])

In [29]:
grad_boost_clf_random_search.best_params_

{'grad_boost__random_state': 42,
 'grad_boost__n_estimators': 50,
 'grad_boost__loss': 'exponential',
 'grad_boost__learning_rate': 0.3,
 'grad_boost__criterion': 'squared_error'}

In [30]:
mean_squared_error(y_test_clf, grad_boost_clf_rand_search_pred)

0.5866666666666667

In [31]:
mean_absolute_error(y_test_reg, grad_boost_clf_rand_search_pred)

29.817110284221442

In [35]:
accuracy_score(y_test_clf, grad_boost_clf_rand_search_pred)

0.41333333333333333

In [36]:
cr = classification_report(y_test_clf, grad_boost_clf_rand_search_pred)
print(cr)

              precision    recall  f1-score   support

           0       0.39      0.44      0.41        70
           1       0.44      0.39      0.41        80

    accuracy                           0.41       150
   macro avg       0.42      0.42      0.41       150
weighted avg       0.42      0.41      0.41       150



### Gradient Boost Regression Pipeline

In [37]:

grad_boost_reg_pipeline = Pipeline([
    ('grad_boost', GradientBoostingRegressor())
])

In [38]:
grad_boost_reg_pipeline.get_params().keys()

dict_keys(['memory', 'steps', 'verbose', 'grad_boost', 'grad_boost__alpha', 'grad_boost__ccp_alpha', 'grad_boost__criterion', 'grad_boost__init', 'grad_boost__learning_rate', 'grad_boost__loss', 'grad_boost__max_depth', 'grad_boost__max_features', 'grad_boost__max_leaf_nodes', 'grad_boost__min_impurity_decrease', 'grad_boost__min_samples_leaf', 'grad_boost__min_samples_split', 'grad_boost__min_weight_fraction_leaf', 'grad_boost__n_estimators', 'grad_boost__n_iter_no_change', 'grad_boost__random_state', 'grad_boost__subsample', 'grad_boost__tol', 'grad_boost__validation_fraction', 'grad_boost__verbose', 'grad_boost__warm_start'])

In [39]:
grad_boost_reg_param_grid = {
    'grad_boost__loss': ['squared_error', 'absolute_error', 'huber', 'quantile'],
    'grad_boost__learning_rate': [.3, .5, .7],
    'grad_boost__criterion': ['friedman_mse', 'squared_error'],
    'grad_boost__n_estimators': [50,100,150],
    'grad_boost__random_state': [0,10,42]
}

In [40]:
grad_boost_reg_random_search = RandomizedSearchCV(grad_boost_reg_pipeline, grad_boost_reg_param_grid)

In [41]:
grad_boost_reg_random_search.fit(x_train_reg, y_train_reg)

In [42]:
grad_boost_reg_rand_search_pred = grad_boost_reg_random_search.predict(x_test_clf)
grad_boost_reg_rand_search_pred

array([282.56777067,  84.40936701, 106.97662821, 282.56777067,
        51.96259183, 332.09114433, 396.6019329 , 236.96777067,
       185.85453844, 452.1998212 , 358.09583433, 123.35178349,
       343.5742329 , 148.61298622,  57.30037338, 489.28586814,
       329.4321818 , 106.97662821, 481.69293343, 125.75433821,
       140.09057916, 236.96777067, 209.54777067, 282.56777067,
       353.4021818 , 282.56777067, 400.55735083,  54.68599951,
       429.80875806, 429.80875806, 245.09777067,  84.52750667,
       443.80714303, 221.26176622, 322.85659375, 438.99145806,
       148.61298622, 312.32737375,   8.10693986, 180.02298622,
        52.98224526,  94.51108508, 310.49777067, 451.38033461,
       117.56541612, 407.6453229 , 235.45146622,   1.815565  ,
       303.50659375,  94.51108508, 429.80875806, 294.76877067,
       226.34777067, 488.58803142, 401.69798423,  74.62439414,
       282.56777067, 236.96777067, 479.12806269,  -8.6551605 ,
       314.36777067,  54.6666103 , 419.78245806,  86.00

In [43]:
grad_boost_reg_random_search.best_params_

{'grad_boost__random_state': 42,
 'grad_boost__n_estimators': 100,
 'grad_boost__loss': 'absolute_error',
 'grad_boost__learning_rate': 0.3,
 'grad_boost__criterion': 'friedman_mse'}

In [44]:
mean_squared_error(y_test_reg, grad_boost_reg_rand_search_pred)

40384.59325491258

In [45]:
mean_absolute_error(y_test_reg, grad_boost_reg_rand_search_pred)

165.40511787787833

In [46]:
r2_score(y_test_reg, grad_boost_reg_rand_search_pred)

-0.9149494963842226