# XGBoost

In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale 
from sklearn import model_selection
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor

from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=42)


In [3]:
import xgboost as xgb

In [4]:
DM_train = xgb.DMatrix(data = X_train, label = y_train)
DM_test = xgb.DMatrix(data = X_test, label = y_test)

In [5]:
from xgboost import XGBRegressor

In [6]:
xgb_model = XGBRegressor().fit(X_train, y_train)

## Tahmin

In [7]:
y_pred = xgb_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

366.3863437634965

## Model Tuning

In [8]:
xgb_model

In [9]:
xgb_grid = {
     'colsample_bytree': [0.4, 0.5, 0.6, 0.9, 1], 
     'n_estimators': [100, 200, 500, 1000],
     'max_depth': [2, 3, 4, 5, 6],
     'learning_rate': [0.1, 0.01, 0.5]
}

In [10]:
xgb = XGBRegressor()

xgb_cv = GridSearchCV(xgb, 
                      param_grid = xgb_grid, 
                      cv = 10, 
                      n_jobs = -1,
                      verbose = 2)

xgb_cv.fit(X_train, y_train)

Fitting 10 folds for each of 300 candidates, totalling 3000 fits


In [11]:
xgb_cv.best_params_

{'colsample_bytree': 0.4,
 'learning_rate': 0.5,
 'max_depth': 3,
 'n_estimators': 100}

In [18]:
xgb_tuned = XGBRegressor(colsample_bytree = 0.4, 
                         learning_rate = 0.5, 
                         max_depth = 3, 
                         n_estimators = 100) 

xgb_tuned = xgb_tuned.fit(X_train, y_train)

In [19]:
y_pred = xgb_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

374.29091564195204