# Bagged Trees Regresyon

In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale 
from sklearn import model_selection
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor

from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=42)


In [3]:
bag_model = BaggingRegressor(bootstrap_features = True)
bag_model.fit(X_train, y_train)

In [4]:
bag_model.n_estimators

10

In [5]:
bag_model.estimators_

[DecisionTreeRegressor(random_state=231569092),
 DecisionTreeRegressor(random_state=380072756),
 DecisionTreeRegressor(random_state=2014191094),
 DecisionTreeRegressor(random_state=975066905),
 DecisionTreeRegressor(random_state=2071903035),
 DecisionTreeRegressor(random_state=1336734109),
 DecisionTreeRegressor(random_state=97904009),
 DecisionTreeRegressor(random_state=89039621),
 DecisionTreeRegressor(random_state=2063521463),
 DecisionTreeRegressor(random_state=1637156966)]

In [6]:
bag_model.estimators_samples_

[array([ 36, 178,   3, 113, 162,  61, 160, 155, 105, 156, 183, 117,  81,
        173, 105,  13,  49, 144,  48,  44,  89, 118, 100,  33,  22, 174,
          4, 162, 169, 150,  70, 124,  65,  49, 132, 102,  43,  65, 151,
         56, 111, 171, 159, 141,  43,  17, 191,  33, 178, 136, 106,   5,
        106, 196, 175,  78,  15,  83,  41, 131,  30, 155, 164, 123,  43,
        124, 139, 171,  45, 131, 173,  14,  38, 156, 139, 174, 137, 193,
        129, 174,  30, 149, 135, 144,  90,  97, 192,  38, 158,  57,  46,
         69,  42, 145,  95,  74, 195, 169, 192, 166, 126,  39, 135,  22,
          0, 105,  50,  53, 181, 162,  57,  92, 130,  48, 133, 195,  37,
         41, 178,  40, 183, 188,  84, 193, 160,  69, 158, 140, 100, 151,
        181,  89, 163,  93, 167, 168, 155,  60, 134, 174, 169,  23,  72,
         16, 168, 111,  86, 167,  43, 109, 130,  90, 196,  35,  60, 114,
        153, 135, 178,   7, 139, 130,  77, 163,  59,  73, 167,  86,  19,
        161,  92, 123,  79,  46,  64, 161, 175,  79

In [7]:
bag_model.estimators_features_

[array([ 4, 17,  7,  0,  6, 18,  6, 10,  7,  4, 17, 14, 11, 14, 15,  2,  4,
         7,  2]),
 array([16, 18, 10,  0, 14, 14, 16,  6,  3, 16,  6, 12, 11,  0, 15,  3,  2,
        14,  2]),
 array([13, 10, 18,  0, 16, 17,  1,  5, 11,  4,  0,  7,  0, 18,  6,  8, 16,
        11,  4]),
 array([12,  4, 18,  0,  3,  2, 12,  5, 12, 17, 15, 16, 12, 13, 17, 13, 11,
        15, 12]),
 array([18, 14,  2, 10,  6, 18, 10,  1,  0,  3,  4, 15, 13, 13, 11, 12, 10,
         2,  2]),
 array([ 7,  8, 11,  2,  2,  6,  6,  4,  8, 14, 12, 14, 16,  7, 10, 11,  6,
        10, 16]),
 array([ 9,  2, 17, 13, 10,  9, 12,  9,  8,  4, 18, 12, 16,  0, 13,  9, 16,
         6, 10]),
 array([ 5,  4, 14,  5,  8,  8, 16,  2, 13,  8,  0,  8,  3,  5, 11,  1,  4,
        12, 14]),
 array([12,  9, 11,  3,  6, 12,  6, 17, 18,  0, 10, 15,  3,  8,  7,  8,  1,
         3,  5]),
 array([ 6,  8,  3,  0, 11, 18, 14,  4,  7,  9,  7, 10, 16,  9,  9,  4,  9,
         8, 14])]

In [8]:
bag_model.estimators_[1]

## Tahmin

In [9]:
y_pred = bag_model.predict(X_test)

In [10]:
np.sqrt(mean_squared_error(y_test, y_pred))

374.1182137243335

In [11]:
iki_y_pred = bag_model.estimators_[1].fit(X_train, y_train).predict(X_test)

In [12]:
np.sqrt(mean_squared_error(y_test, iki_y_pred))

489.5750892277234

In [13]:
yedi_y_pred = bag_model.estimators_[6].fit(X_train, y_train).predict(X_test)

In [14]:
np.sqrt(mean_squared_error(y_test, yedi_y_pred))

431.58512981237743

# Model Tuning

In [15]:
bag_model = BaggingRegressor(bootstrap_features = True)
bag_model.fit(X_train, y_train)

In [17]:
bag_params = {"n_estimators": range(2, 20)}

In [19]:
bag_cv_model = GridSearchCV(bag_model, bag_params, cv=10)

In [20]:
bag_cv_model.fit(X_train, y_train)

In [21]:
bag_cv_model.best_params_

{'n_estimators': 18}

In [23]:
bag_tuned = BaggingRegressor(n_estimators=14, random_state=45)

In [24]:
bag_tuned.fit(X_train, y_train)

In [25]:
y_pred = bag_tuned.predict(X_test)

In [26]:
np.sqrt(mean_squared_error(y_test, y_pred))

346.457987188104