# Bagged Trees Regresyon

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingRegressor

from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.25,
                                                    random_state=42)


In [3]:
bag_model = BaggingRegressor(bootstrap_features = True)
bag_model.fit(X_train, y_train)

In [4]:
bag_model.n_estimators

10

In [5]:
bag_model.estimators_

[DecisionTreeRegressor(random_state=1423900827),
 DecisionTreeRegressor(random_state=1541023650),
 DecisionTreeRegressor(random_state=416591490),
 DecisionTreeRegressor(random_state=675788198),
 DecisionTreeRegressor(random_state=2104356656),
 DecisionTreeRegressor(random_state=1434083539),
 DecisionTreeRegressor(random_state=6966602),
 DecisionTreeRegressor(random_state=1389032533),
 DecisionTreeRegressor(random_state=1269174051),
 DecisionTreeRegressor(random_state=1466224086)]

In [6]:
bag_model.estimators_samples_

[array([136,  55,  52,  81, 171,  63,  56,  86,  30, 157, 160,  59, 196,
        181,  88, 163, 186, 184,  97,   6,  18,  78, 118,  67, 181, 124,
         81,  61,  74,  45,   7,  57,  15, 194,  16,   7,  94, 168,  59,
        149,  99,  99, 155,  80,  33,  42,  69, 160, 189, 145,  25,  11,
        188,  17, 164,  46, 147,  42,  33, 118,   2, 115, 157, 170, 173,
        174,   8, 185,  94,  79,  69, 192,   5,  49,  99,   8, 132, 139,
        102, 169, 137, 138,   9, 187,  58, 152, 151, 117, 157,  23, 157,
        156, 174, 150, 102, 154,  97,  19,  33, 135, 165, 126,  28,  32,
        181, 184, 151,  74,  15,  45,  93,  74, 152, 107, 186,  39,  66,
          7,  13, 149, 181, 135,  75,  23,  33,   5, 128, 116,  65,  29,
         60,  85, 184, 121,   8, 126, 110, 104,  39, 144,   2,  58,  55,
        161,  57,  15,  40, 147, 168,  69,  54,  80,  86,  67,  47,  26,
          5, 131, 135,   6,  68, 115,  38,  53, 104, 151,  25,   4,   6,
         40, 187,  70, 117,  14,   6, 112, 143,  26

In [7]:
bag_model.estimators_features_

[array([ 5, 15, 16,  0,  7, 14,  2, 18,  3,  6, 13, 10,  2,  9, 10, 15, 15,
         1,  5]),
 array([ 2,  2, 18,  4, 14,  3, 17, 17, 16,  0,  0,  6,  7,  4, 14, 15, 10,
        14, 11]),
 array([ 2, 16,  9, 12, 11, 18, 17,  9,  9,  9,  1,  0, 12,  4,  4,  1,  5,
        17, 13]),
 array([ 6, 12,  1, 17, 11,  7, 17,  4, 13, 18,  7, 17,  1,  9,  3, 13, 16,
        16,  1]),
 array([16, 15,  7,  8,  2,  4, 17, 16, 16, 18,  2,  7, 15,  2,  4,  7, 16,
         4, 16]),
 array([ 1, 11, 14,  9,  7, 11, 12, 14,  1,  2,  4,  9, 18, 16,  0, 17,  2,
         4,  9]),
 array([10,  9, 16,  4,  6,  3,  8,  0, 17,  8,  1,  3, 14,  4, 10,  4,  4,
         8, 10]),
 array([ 9, 10, 10,  4,  7,  7,  0,  8, 14,  4,  2, 16, 11, 13,  7, 17, 15,
        14, 17]),
 array([ 3, 13,  5, 10, 10,  0,  3, 12, 14,  7,  8,  0,  4,  8,  6,  4, 17,
         4, 18]),
 array([13, 15, 16,  0, 11, 17,  0, 13,  3,  0,  1, 13,  2,  7, 15,  9,  4,
        11,  6])]

In [8]:
bag_model.estimators_[1]

## Tahmin

In [9]:
y_pred = bag_model.predict(X_test)

In [10]:
np.sqrt(mean_squared_error(y_test, y_pred))

347.0706680002063

In [11]:
iki_y_pred = bag_model.estimators_[1].fit(X_train, y_train).predict(X_test)

In [12]:
np.sqrt(mean_squared_error(y_test, iki_y_pred))

530.3097597317463

In [13]:
yedi_y_pred = bag_model.estimators_[4].fit(X_train, y_train).predict(X_test)

In [14]:
np.sqrt(mean_squared_error(y_test, yedi_y_pred))

533.6905744009862

# Model Tuning

In [15]:
bag_model = BaggingRegressor(bootstrap_features = True)
bag_model.fit(X_train, y_train)

In [16]:
bag_params = {"n_estimators": range(2,20)}

In [17]:
bag_cv_model = GridSearchCV(bag_model, bag_params, cv = 10)

In [18]:
bag_cv_model.fit(X_train, y_train)

In [19]:
bag_cv_model.best_params_

{'n_estimators': 14}

In [20]:
bag_tuned = BaggingRegressor( n_estimators = 14, random_state = 45)

In [21]:
bag_tuned.fit(X_train, y_train)

In [22]:
y_pred = bag_tuned.predict(X_test)

In [23]:
np.sqrt(mean_squared_error(y_test, y_pred))

347.50463424981314