In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, ShuffleSplit, GridSearchCV,cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import scale, StandardScaler
from sklearn.svm import SVR
from sklearn.ensemble import BaggingRegressor
from sklearn.datasets import make_regression

In [5]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League','Division','NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary','League','Division','NewLeague'], axis = 1).astype("float64")
X = pd.concat([X_,dms[['League_N','Division_W','NewLeague_N']]], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [9]:
bag_model = BaggingRegressor(bootstrap_features = True)
bag_model.fit(X_train, y_train)

BaggingRegressor(bootstrap_features=True)

In [13]:
bag_model.estimators_

[DecisionTreeRegressor(random_state=1822963257),
 DecisionTreeRegressor(random_state=1108109976),
 DecisionTreeRegressor(random_state=812772860),
 DecisionTreeRegressor(random_state=505615748),
 DecisionTreeRegressor(random_state=1656414629),
 DecisionTreeRegressor(random_state=691764992),
 DecisionTreeRegressor(random_state=13414137),
 DecisionTreeRegressor(random_state=1337353161),
 DecisionTreeRegressor(random_state=77916509),
 DecisionTreeRegressor(random_state=555148117)]

In [15]:
bag_model.estimators_features_

[array([ 3,  4, 15, 18,  8, 10,  4, 11,  0, 16,  7,  3, 11,  1, 10,  4,  5,
         4, 12]),
 array([ 1, 17, 18,  2,  3,  0,  0, 15,  0, 18,  3,  0, 17, 12,  8,  3, 11,
         0,  8]),
 array([ 7,  8,  7, 12, 15,  4, 16,  3,  7,  4, 17, 14,  5,  1,  3, 15, 11,
        17, 11]),
 array([ 4, 12,  4, 12, 15,  6,  5,  4,  0, 16,  4,  8,  8, 10, 15,  6, 17,
        17,  1]),
 array([ 5, 18, 14, 18,  8, 11, 13, 18, 16,  8,  0, 18, 15,  6,  2, 13, 17,
         5,  6]),
 array([ 0,  4,  0,  8, 10,  5,  7, 10,  6, 18, 13, 15, 13,  0, 18, 16,  3,
        12,  6]),
 array([10,  2, 12, 15, 17,  9,  7, 17,  7, 10, 18, 10,  6,  9,  6, 10,  2,
         4, 15]),
 array([ 9,  9, 15, 11, 10, 16,  3, 10,  3, 17, 17, 13,  5, 11,  3,  2, 12,
         5, 17]),
 array([ 9, 18,  3,  6, 11, 10,  6, 13, 17,  9, 15,  4, 16, 15, 11,  5, 17,
        15, 14]),
 array([14,  1, 10,  2,  7,  1, 18, 13,  7,  7,  6, 10,  0,  5, 11,  4, 17,
         3, 13])]

In [16]:
# TAHMİN

In [18]:
y_pred = bag_model.predict(X_test)

In [19]:
np.sqrt(mean_squared_error(y_test, y_pred))

374.03466175747997

In [20]:
#TUNING

In [23]:
bag_params = {'n_estimators':range(2,20)}

In [24]:
bag_cv_model = GridSearchCV(bag_model, bag_params, cv = 10)

In [25]:
bag_cv_model.fit(X_train, y_train)

GridSearchCV(cv=10, estimator=BaggingRegressor(bootstrap_features=True),
             param_grid={'n_estimators': range(2, 20)})

In [27]:
bag_cv_model.best_params_

{'n_estimators': 10}

In [31]:
bag_tuned = BaggingRegressor(n_estimators = 10, random_state = 44)
bag_tuned.fit(X_train, y_train)

BaggingRegressor(random_state=44)

In [32]:
y_pred = bag_tuned.predict(X_test)

In [34]:
np.sqrt(mean_squared_error(y_test, y_pred))

352.2981865139768