### CatBoost

In [6]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import numpy as np

In [7]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [10]:
!pip install catboost



In [15]:
from catboost import CatBoostRegressor

In [16]:
catb = CatBoostRegressor()
catb_model = catb.fit(X_train,y_train)

Learning rate set to 0.031674
0:	learn: 437.6430699	total: 148ms	remaining: 2m 27s
1:	learn: 431.3923642	total: 149ms	remaining: 1m 14s
2:	learn: 424.8820360	total: 149ms	remaining: 49.7s
3:	learn: 418.2514904	total: 150ms	remaining: 37.4s
4:	learn: 412.6394021	total: 151ms	remaining: 30s
5:	learn: 406.6247020	total: 152ms	remaining: 25.2s
6:	learn: 400.5321206	total: 153ms	remaining: 21.7s
7:	learn: 394.6683437	total: 154ms	remaining: 19s
8:	learn: 388.2496484	total: 154ms	remaining: 17s
9:	learn: 382.9448842	total: 155ms	remaining: 15.4s
10:	learn: 377.2600080	total: 156ms	remaining: 14s
11:	learn: 372.4829606	total: 157ms	remaining: 12.9s
12:	learn: 366.6823437	total: 157ms	remaining: 11.9s
13:	learn: 362.6076230	total: 158ms	remaining: 11.1s
14:	learn: 358.0107745	total: 159ms	remaining: 10.4s
15:	learn: 353.2802665	total: 160ms	remaining: 9.81s
16:	learn: 348.5646265	total: 160ms	remaining: 9.27s
17:	learn: 343.6407912	total: 161ms	remaining: 8.79s
18:	learn: 339.2363847	total: 16

# Predict

In [35]:
y_pred = catb_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

351.194631344607

# Model Tuning

In [40]:
catb_grid = {
    "iterations" : [200,500,1000,2000],
    "learning_rate" : [0.01,0.03,0.05,0.1],
    "depth" : [3,4,5,6,7,8] }
catb_cv = GridSearchCV(catb,catb_grid, cv = 5 ,n_jobs = -1 , verbose = 2)

In [42]:
catb_cv.fit(X_train,y_train)

Fitting 5 folds for each of 96 candidates, totalling 480 fits
0:	learn: 422.4143448	total: 1.2ms	remaining: 1.2s
1:	learn: 404.1864276	total: 2.56ms	remaining: 1.28s
2:	learn: 386.3231718	total: 3.39ms	remaining: 1.13s
3:	learn: 370.5548032	total: 4.22ms	remaining: 1.05s
4:	learn: 354.9242038	total: 5.16ms	remaining: 1.03s
5:	learn: 342.3403984	total: 6.01ms	remaining: 996ms
6:	learn: 328.2370070	total: 6.88ms	remaining: 975ms
7:	learn: 317.5056526	total: 7.78ms	remaining: 965ms
8:	learn: 306.6243511	total: 8.78ms	remaining: 967ms
9:	learn: 297.3147023	total: 9.64ms	remaining: 954ms
10:	learn: 288.3685892	total: 10.5ms	remaining: 943ms
11:	learn: 281.0996220	total: 11.3ms	remaining: 934ms
12:	learn: 273.2254898	total: 12.2ms	remaining: 929ms
13:	learn: 266.9003385	total: 13.1ms	remaining: 924ms
14:	learn: 261.9092500	total: 13.9ms	remaining: 912ms
15:	learn: 256.2637350	total: 14.6ms	remaining: 901ms
16:	learn: 250.3667935	total: 15.5ms	remaining: 894ms
17:	learn: 244.8631098	total: 16

In [46]:
catb_cv.best_params_

{'depth': 5, 'iterations': 1000, 'learning_rate': 0.1}

# Model Tuning

In [53]:
catb_tuned = CatBoostRegressor(depth = 5 , iterations = 1000 , learning_rate = 0.1)
catb_tuned = catb_tuned.fit(X_train, y_train)

0:	learn: 422.4143448	total: 1.3ms	remaining: 1.3s
1:	learn: 404.1864276	total: 2.56ms	remaining: 1.27s
2:	learn: 386.3231718	total: 3.33ms	remaining: 1.11s
3:	learn: 370.5548032	total: 3.98ms	remaining: 991ms
4:	learn: 354.9242038	total: 4.82ms	remaining: 959ms
5:	learn: 342.3403984	total: 5.75ms	remaining: 952ms
6:	learn: 328.2370070	total: 6.59ms	remaining: 935ms
7:	learn: 317.5056526	total: 7.53ms	remaining: 934ms
8:	learn: 306.6243511	total: 8.26ms	remaining: 909ms
9:	learn: 297.3147023	total: 9.14ms	remaining: 905ms
10:	learn: 288.3685892	total: 10.1ms	remaining: 907ms
11:	learn: 281.0996220	total: 10.9ms	remaining: 896ms
12:	learn: 273.2254898	total: 11.9ms	remaining: 901ms
13:	learn: 266.9003385	total: 12.8ms	remaining: 901ms
14:	learn: 261.9092500	total: 13.6ms	remaining: 893ms
15:	learn: 256.2637350	total: 14.4ms	remaining: 885ms
16:	learn: 250.3667935	total: 15.3ms	remaining: 884ms
17:	learn: 244.8631098	total: 16.2ms	remaining: 884ms
18:	learn: 240.1540669	total: 17ms	remai

In [55]:
y_pred = catb_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test,y_pred))

356.665762904938