In [41]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import BaggingRegressor

In [7]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[["League", "Division", "NewLeague"]])

y = df["Salary"]
X_ = df.drop(["Salary", "League", "Division", "NewLeague"], axis=1).astype("float64")
X = pd.concat([X_, dms[["League_N", "Division_W", "NewLeague_N"]]], axis = 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 25, random_state = 42)

In [13]:
# Model Kurulumu
bagging = BaggingRegressor(bootstrap_features = True)

# Model Eğitimi
bagging.fit(X_train, y_train)

In [18]:
# Kaç tane ağaç modeli olduğunu bulma
bagging.n_estimators

10

In [19]:
# Ağaç modellerine erişim
bagging.estimators_

[DecisionTreeRegressor(random_state=1833531455),
 DecisionTreeRegressor(random_state=918373910),
 DecisionTreeRegressor(random_state=950988185),
 DecisionTreeRegressor(random_state=1763694547),
 DecisionTreeRegressor(random_state=675263319),
 DecisionTreeRegressor(random_state=1715948060),
 DecisionTreeRegressor(random_state=88948445),
 DecisionTreeRegressor(random_state=1555197514),
 DecisionTreeRegressor(random_state=2077604417),
 DecisionTreeRegressor(random_state=1554516318)]

In [22]:
# Her bir ağaçtaki örnekler için
bagging.estimators_samples_

[array([177, 178, 166, 213, 228,  49,  81, 187,  59, 126, 172,  23, 211,
          0, 155, 109,   6,   6,  13,   6, 112, 160, 104,  53, 227,  67,
         72, 166, 158, 110,  15, 235, 100, 204, 231, 236, 176,  44,  53,
         55, 217,  15, 194, 157, 192,  60,  45,  42, 157, 200, 173,  74,
         59,  58,  43, 120, 143,  90, 180,  51, 161, 130,  77,  35, 151,
        111, 201, 136, 185, 141, 127,  15, 207, 178, 145, 200, 115,  37,
         41, 196, 227,  31,   2,  19,  96,  63,   0, 223,   7, 206,  83,
        177,  59,  29,  48,  52, 227, 142, 143, 155,  33, 154,  61, 198,
         16, 178, 226,  37,  57, 164, 138, 202,  86,  45, 112, 234,  89,
        198, 112, 114,   2,  23, 188, 233, 102, 229, 231,  75, 224,  46,
        104,  49,  46, 183, 153, 101, 206,   0,  96, 130, 194,  67, 117,
        220, 219, 174, 132, 190, 135, 144, 140, 136, 227,  63, 221,  55,
        112,  43, 213, 133, 118, 232,   8, 102, 138,  64,  96, 134,  75,
        207, 174,  40, 130, 147, 166,  97,  69, 111

In [23]:
# Her bir ağacın bağımsız değişkenine ulaşmak için
bagging.estimators_features_

[array([13,  1, 17,  5,  5, 10, 14,  3,  2, 16,  2,  1, 17,  6,  8, 12,  6,
         3, 13]),
 array([ 3,  9,  7, 14,  8,  7, 14, 17, 10,  6,  9,  3,  5,  0, 13,  7, 16,
         0,  3]),
 array([11,  7, 11, 18, 14, 16,  7, 11,  2,  1, 18, 10,  1,  2, 15,  6, 13,
         1, 17]),
 array([ 9, 17, 12,  7, 13,  4, 14,  6, 14,  9, 13, 16, 15, 14, 17, 14, 18,
        12, 11]),
 array([ 1, 17, 12, 12, 12,  6, 15, 17, 16,  0,  1,  8,  5,  2,  6, 13,  0,
        15,  9]),
 array([ 2, 18, 10,  5,  2,  3,  0,  1,  8,  6, 12, 13,  4, 16,  1,  9, 10,
        11, 16]),
 array([ 7,  9, 14,  3, 17, 17, 11, 13, 10, 18, 11,  6,  3,  9,  2,  4, 13,
         4,  8]),
 array([10,  2, 13,  0, 14, 12, 11, 10,  3, 16, 17, 17,  1, 11,  7, 18,  7,
        17, 18]),
 array([ 1,  9,  8, 17,  2, 13,  3, 18,  1, 10, 18,  0, 10, 16, 16, 11,  1,
        16,  5]),
 array([17, 10,  6,  6,  3, 14, 13, 16,  5,  9,  1, 15,  1,  0,  7,  0, 15,
        14, 14])]

In [25]:
# Her modele özel değerlere erişmeye çalışalım
bagging.estimators_[:3]

[DecisionTreeRegressor(random_state=1833531455),
 DecisionTreeRegressor(random_state=918373910),
 DecisionTreeRegressor(random_state=950988185)]

## Tahmin

In [29]:
y_pred = bagging.predict(X_test)

In [36]:
print("Test Hatası: ", np.sqrt(mean_squared_error(y_pred, y_test)))

Test Hatası:  278.99050677744646


In [28]:
# Her bir ağaç için hata değeri
for i, estimator in enumerate(bagging.estimators_):
    y_pred = estimator.predict(X_test)
    mse = np.sqrt(mean_squared_error(y_test, y_pred))
    print(f"Ağaç {i+1} için ortalama kareler Hata (MSE): {mse}")

Ağaç 1 için ortalama kareler Hata (MSE): 633.5211256568482
Ağaç 2 için ortalama kareler Hata (MSE): 1373.0374592317282
Ağaç 3 için ortalama kareler Hata (MSE): 468.2867075006507
Ağaç 4 için ortalama kareler Hata (MSE): 545.6674342981446
Ağaç 5 için ortalama kareler Hata (MSE): 492.44122334906126
Ağaç 6 için ortalama kareler Hata (MSE): 358.052458056358
Ağaç 7 için ortalama kareler Hata (MSE): 581.8801897757992
Ağaç 8 için ortalama kareler Hata (MSE): 738.3071707992007
Ağaç 9 için ortalama kareler Hata (MSE): 876.1733554498219
Ağaç 10 için ortalama kareler Hata (MSE): 545.4970064569741




## Model Tuning

In [68]:
# Model Kurulumu
bagging = BaggingRegressor(bootstrap_features = True)

# Model Eğitimi
bagging.fit(X_train, y_train)

In [69]:
# Parametre aralığı oluşturma
bag_params = {"n_estimators": range(2,20)}

In [70]:
# En iyi Parametreyi seçme işlemi 
bag_cv_model = GridSearchCV(bagging, bag_params, cv=10)

In [71]:
# Modeli eğitme
bag_cv_model.fit(X_train, y_train)

In [72]:
# En iyi parametreyi öğrenme
bag_cv_model.best_params_

{'n_estimators': 7}

In [73]:
# En iyi parametre ile model oluşumu
bag_tuned = BaggingRegressor(n_estimators = 7, random_state = 45)

In [74]:
# Model eğitimi
bag_tuned.fit(X_train, y_train)

In [75]:
y_pred = bag_tuned.predict(X_test)

In [76]:
np.sqrt(mean_squared_error(y_pred, y_test))

273.2577952009809