# Bagged Tree Regression

In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale 
from sklearn import model_selection
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor

from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [3]:
bag_model=BaggingRegressor(bootstrap_features=True)

In [4]:
bag_model.fit(X_train,y_train)

BaggingRegressor(bootstrap_features=True)

In [5]:
bag_model.estimators_

[DecisionTreeRegressor(random_state=645807840),
 DecisionTreeRegressor(random_state=1965859853),
 DecisionTreeRegressor(random_state=7635885),
 DecisionTreeRegressor(random_state=1349109735),
 DecisionTreeRegressor(random_state=1107287120),
 DecisionTreeRegressor(random_state=315637402),
 DecisionTreeRegressor(random_state=1573485830),
 DecisionTreeRegressor(random_state=65754825),
 DecisionTreeRegressor(random_state=531016858),
 DecisionTreeRegressor(random_state=1723100518)]

In [6]:
bag_model.n_estimators

10

In [7]:
bag_model.estimators_samples_

[array([ 85,  33,  90, 102,  37, 179,  73, 152,   3, 173,  97,  68, 189,
        191,  98,  12, 187, 162, 128,   7, 157, 186,  32, 115, 136,  68,
        176,  70,  76,  47, 122,  94, 185,  12,  85, 188,  27, 152, 182,
         46, 168, 123, 120,  84,  58, 124, 158,  96, 125, 182, 175,  96,
          0, 115, 165, 182, 162, 185,  93,  54,  68,  88, 110, 191,  12,
        193, 156, 169,  31,  23, 186,  85,  76, 145,  36, 194,  62, 106,
         64,  66,  94,  92,  80,  46,  80,  20, 107, 131, 109,  50,  23,
        166, 173, 176, 194,  10, 153,  24,  46,  61, 195, 166, 192,  13,
         45,  32, 154,   9,  34, 123,  52,  36,  72, 157, 146, 151, 111,
         56,   3,   1, 192, 109,  30, 189, 183, 187,  19,  96,  82, 192,
         53,  69,  91, 195,  95, 127,  91,  79, 136, 167,  67,  19, 181,
        172,   9,   3,  68,  22, 131,  73,  88,  85, 171,  34,  10, 111,
         23,  51, 164,   2,  47, 169,  29,  53,  73,  53,  18,  57,  52,
        176,  58, 104, 100,  60, 102,  29,  40,  76

In [8]:
bag_model.estimators_features_

[array([ 0, 11, 14,  1, 16, 15,  2, 11, 13,  1,  6,  8, 12, 10,  1,  3,  8,
         7, 11]),
 array([13, 10, 12, 12, 16, 14,  9,  1, 12,  4, 13, 13, 13, 15,  5, 17,  9,
        14,  2]),
 array([13, 11, 18, 11,  9,  9,  9,  0, 12,  1,  0,  7, 15, 17, 15,  2,  3,
        16, 11]),
 array([ 7, 13,  0, 14, 18,  9,  9, 15,  6, 10, 18,  9, 13, 12, 13, 16, 18,
        18, 15]),
 array([16,  1, 17,  5, 11, 17,  2, 18, 18,  6, 16,  0,  9,  1, 15,  7,  6,
        13,  0]),
 array([ 5,  4,  6, 12, 17,  4, 15, 12,  8, 11,  6,  5,  5, 15, 15,  4,  2,
         6,  3]),
 array([ 6,  6,  7,  2,  3,  5, 12,  0, 14, 13,  1,  1, 16, 17,  6,  2,  3,
        12, 13]),
 array([ 9,  5,  0,  9,  8, 14, 14, 15, 11,  6, 17,  9,  7, 18, 16, 17, 12,
        17,  7]),
 array([ 4,  8,  7, 11,  6, 17,  7,  3, 17,  0, 11, 17, 18, 11,  9, 12,  9,
         3,  0]),
 array([ 6,  4, 10, 12,  0, 10,  1,  0,  5,  9,  0, 16,  7,  5, 12, 12, 18,
        11,  7])]

In [9]:
bag_model.estimators_[1]

DecisionTreeRegressor(random_state=1965859853)

## Tahmin

In [10]:
y_pred=bag_model.predict(X_test)

In [11]:
np.sqrt(mean_squared_error(y_test,y_pred))

352.3263994458733

In [12]:
iki_y_pred=bag_model.estimators_[1].fit(X_train,y_train).predict(X_test)

In [13]:
np.sqrt(mean_squared_error(iki_y_pred,y_test))

464.11406691512815

# Model Tunning

In [14]:
bag_model=BaggingRegressor(bootstrap_features=True)

In [15]:
bag_model.fit(X_train,y_train)

BaggingRegressor(bootstrap_features=True)

In [16]:
bag_params={"n_estimators":range(2,20)}

In [17]:
bag_cv_model=GridSearchCV(bag_model,bag_params,cv=10)

In [18]:
bag_cv_model.fit(X_train,y_train)

GridSearchCV(cv=10, estimator=BaggingRegressor(bootstrap_features=True),
             param_grid={'n_estimators': range(2, 20)})

In [19]:
bag_cv_model.best_params_

{'n_estimators': 18}

In [21]:
bag_tuned=BaggingRegressor(n_estimators=bag_cv_model.best_params_["n_estimators"],random_state=45)

In [22]:
bag_tuned.fit(X_train,y_train)

BaggingRegressor(n_estimators=18, random_state=45)

In [23]:
y_pred=bag_tuned.predict(X_test)

In [24]:
np.sqrt(mean_squared_error(y_test,y_pred))

338.6015890705578