In [1]:
import numpy as np
import pandas as pd 
from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale 
from sklearn import model_selection
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import BaggingRegressor

from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [3]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-1.5.2-py3-none-win_amd64.whl (106.6 MB)
Installing collected packages: xgboost
Successfully installed xgboost-1.5.2


In [4]:
import xgboost as xgb

In [6]:
DM_train=xgb.DMatrix(data=X_train,label=y_train)
DM_test=xgb.DMatrix(data=X_test,label=y_test)

In [7]:
from xgboost import XGBRegressor

In [10]:
xgb_model=XGBRegressor().fit(X_train,y_train)

## Tahmin

In [11]:
y_pred=xgb_model.predict(X_test)

In [12]:
np.sqrt(mean_squared_error(y_test,y_pred))

355.46515176059927

## Model Tunning

In [13]:
xgb_model

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
             gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.300000012,
             max_delta_step=0, max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=8,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [42]:
xgb_grid={
            "colsample_bytree":np.arange(0,1,0.1),
            "n_estimators":[100,200,500,1000],
            "max_depth":[2,3,4,5,6],
            "learning_rate":[0.1,0.01,0.5]
          }

In [44]:
xgb=XGBRegressor()
xgb_cv=GridSearchCV(xgb,
                    param_grid=xgb_grid,
                    cv=10,n_jobs=-1,
                    verbose=2)

In [45]:
xgb_cv.fit(X_train,y_train)

Fitting 10 folds for each of 600 candidates, totalling 6000 fits


GridSearchCV(cv=10,
             estimator=XGBRegressor(base_score=None, booster=None,
                                    colsample_bylevel=None,
                                    colsample_bynode=None,
                                    colsample_bytree=None,
                                    enable_categorical=False, gamma=None,
                                    gpu_id=None, importance_type=None,
                                    interaction_constraints=None,
                                    learning_rate=None, max_delta_step=None,
                                    max_depth=None, min_child_weight=None,
                                    missing=nan, monotone_constraints=None,...
                                    num_parallel_tree=None, predictor=None,
                                    random_state=None, reg_alpha=None,
                                    reg_lambda=None, scale_pos_weight=None,
                                    subsample=None, tree_method=None,


In [46]:
xgb_cv.best_params_
col=xgb_cv.best_params_["colsample_bytree"]
learning_rate=xgb_cv.best_params_["learning_rate"]
max_depth=xgb_cv.best_params_["max_depth"]
n_estimators=xgb_cv.best_params_["n_estimators"]

In [47]:
xgb_tuned=XGBRegressor(colsample_bytree=col,
                       learning_rate=learning_rate,
                       max_depth=max_depth,
                       n_estimators=n_estimators
                      )
xgb_tuned.fit(X_train,y_train)
                       

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.30000000000000004,
             enable_categorical=False, gamma=0, gpu_id=-1, importance_type=None,
             interaction_constraints='', learning_rate=0.1, max_delta_step=0,
             max_depth=6, min_child_weight=1, missing=nan,
             monotone_constraints='()', n_estimators=100, n_jobs=8,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=1, tree_method='exact',
             validate_parameters=1, verbosity=None)

In [48]:
y_pred=xgb_tuned.predict(X_test)

In [49]:
np.sqrt(mean_squared_error(y_test,y_pred))

368.09599784053574