In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, scale
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from catboost import CatBoostRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from warnings import filterwarnings
filterwarnings("ignore")

In [2]:
df = pd.read_csv("Hitters.csv")
df = df.dropna()

#one-hot encoding
dms = pd.get_dummies(df[["League", "Division", "NewLeague"]])

In [3]:
def compML(dataset, y, alg):
    # train-test split
    y = df[y]
    X_ = df.drop(["Salary", "League", "Division", "NewLeague"], axis = 1).astype("float64")
    X = pd.concat([X_, dms[["League_N", "Division_W", "NewLeague_N"]]], axis = 1)
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.25,random_state = 42)
    
    # model

    model = alg().fit(X_train, y_train)
    y_pred = model.predict(X_test)
    RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
    model_name = alg.__name__
    print(model_name, "Modelin Test Hatası: ", RMSE)

In [4]:
compML(df, "Salary", LGBMRegressor)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000077 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 831
[LightGBM] [Info] Number of data points in the train set: 197, number of used features: 19
[LightGBM] [Info] Start training from score 543.483442
LGBMRegressor Modelin Test Hatası:  363.8712087611089


In [5]:
compML(df, "Salary", SVR)

SVR Modelin Test Hatası:  460.0032657244849


In [6]:
compML(df, "Salary", KNeighborsRegressor)

KNeighborsRegressor Modelin Test Hatası:  426.6570764525201


In [7]:
# liste halinde algoritma girme
models = [LGBMRegressor, XGBRegressor, 
          CatBoostRegressor, GradientBoostingRegressor, 
          RandomForestRegressor,MLPRegressor, 
          DecisionTreeRegressor,SVR,KNeighborsRegressor]

In [8]:
for i in models:
    compML(df, "Salary", i)

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000367 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 831
[LightGBM] [Info] Number of data points in the train set: 197, number of used features: 19
[LightGBM] [Info] Start training from score 543.483442
LGBMRegressor Modelin Test Hatası:  363.8712087611089
XGBRegressor Modelin Test Hatası:  366.3863437634965
Learning rate set to 0.031674
0:	learn: 437.6430699	total: 49.5ms	remaining: 49.5s
1:	learn: 431.3923642	total: 50ms	remaining: 25s
2:	learn: 424.8820360	total: 50.6ms	remaining: 16.8s
3:	learn: 418.2514904	total: 51.2ms	remaining: 12.8s
4:	learn: 412.6394021	total: 52ms	remaining: 10.4s
5:	learn: 406.6247020	total: 52.8ms	remaining: 8.75s
6:	learn: 400.5321206	total: 53.5ms	remaining: 7.59s
7:	learn: 394.6683437	total: 54.2ms	remaining: 6.72s
8:	learn: 388.2496484	total: 55ms	remaining: 6.05s
9:	learn: 382.9448842	total: 55.7ms	remaining: 5.52s


210:	learn: 124.1962887	total: 180ms	remaining: 672ms
211:	learn: 123.9686937	total: 180ms	remaining: 670ms
212:	learn: 123.0280048	total: 181ms	remaining: 668ms
213:	learn: 122.5447141	total: 181ms	remaining: 666ms
214:	learn: 122.1785305	total: 182ms	remaining: 665ms
215:	learn: 121.4403560	total: 183ms	remaining: 663ms
216:	learn: 120.8269584	total: 183ms	remaining: 661ms
217:	learn: 120.2399329	total: 184ms	remaining: 660ms
218:	learn: 119.4852557	total: 184ms	remaining: 658ms
219:	learn: 119.2094151	total: 185ms	remaining: 656ms
220:	learn: 119.0417229	total: 186ms	remaining: 654ms
221:	learn: 118.9557915	total: 186ms	remaining: 653ms
222:	learn: 118.3082421	total: 187ms	remaining: 651ms
223:	learn: 117.7364211	total: 187ms	remaining: 649ms
224:	learn: 117.6324235	total: 188ms	remaining: 647ms
225:	learn: 117.2879743	total: 188ms	remaining: 645ms
226:	learn: 117.0422612	total: 189ms	remaining: 643ms
227:	learn: 116.8001379	total: 190ms	remaining: 642ms
228:	learn: 116.4854978	tota

519:	learn: 46.3630405	total: 366ms	remaining: 338ms
520:	learn: 46.1730188	total: 367ms	remaining: 337ms
521:	learn: 46.1360908	total: 367ms	remaining: 336ms
522:	learn: 45.9652487	total: 368ms	remaining: 336ms
523:	learn: 45.8122703	total: 369ms	remaining: 335ms
524:	learn: 45.4984451	total: 369ms	remaining: 334ms
525:	learn: 45.3161076	total: 370ms	remaining: 333ms
526:	learn: 45.2802854	total: 371ms	remaining: 333ms
527:	learn: 45.2448275	total: 371ms	remaining: 332ms
528:	learn: 45.0816321	total: 372ms	remaining: 331ms
529:	learn: 44.9794319	total: 372ms	remaining: 330ms
530:	learn: 44.6832140	total: 373ms	remaining: 329ms
531:	learn: 44.4460046	total: 373ms	remaining: 329ms
532:	learn: 44.2118287	total: 374ms	remaining: 328ms
533:	learn: 44.0693524	total: 375ms	remaining: 327ms
534:	learn: 43.7781614	total: 375ms	remaining: 326ms
535:	learn: 43.5541731	total: 376ms	remaining: 325ms
536:	learn: 43.5134866	total: 377ms	remaining: 325ms
537:	learn: 43.3609810	total: 377ms	remaining:

821:	learn: 18.8989735	total: 553ms	remaining: 120ms
822:	learn: 18.8275176	total: 553ms	remaining: 119ms
823:	learn: 18.8092278	total: 554ms	remaining: 118ms
824:	learn: 18.7938447	total: 555ms	remaining: 118ms
825:	learn: 18.7871267	total: 556ms	remaining: 117ms
826:	learn: 18.7299674	total: 556ms	remaining: 116ms
827:	learn: 18.6808264	total: 557ms	remaining: 116ms
828:	learn: 18.5984875	total: 557ms	remaining: 115ms
829:	learn: 18.5170088	total: 558ms	remaining: 114ms
830:	learn: 18.4707382	total: 558ms	remaining: 114ms
831:	learn: 18.4368619	total: 559ms	remaining: 113ms
832:	learn: 18.3683191	total: 560ms	remaining: 112ms
833:	learn: 18.3295058	total: 560ms	remaining: 112ms
834:	learn: 18.2587263	total: 561ms	remaining: 111ms
835:	learn: 18.2103679	total: 562ms	remaining: 110ms
836:	learn: 18.1781032	total: 562ms	remaining: 109ms
837:	learn: 18.0746921	total: 563ms	remaining: 109ms
838:	learn: 18.0131984	total: 563ms	remaining: 108ms
839:	learn: 17.9510877	total: 564ms	remaining:

GradientBoostingRegressor Modelin Test Hatası:  346.43488879086965
RandomForestRegressor Modelin Test Hatası:  349.41673528543924
MLPRegressor Modelin Test Hatası:  362.5888740452249
DecisionTreeRegressor Modelin Test Hatası:  491.84556276695787
SVR Modelin Test Hatası:  460.0032657244849
KNeighborsRegressor Modelin Test Hatası:  426.6570764525201
