In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, ShuffleSplit, GridSearchCV,cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from lightgbm import LGBMRegressor

In [5]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League','Division','NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary','League','Division','NewLeague'], axis = 1).astype("float64")
X = pd.concat([X_,dms[['League_N','Division_W','NewLeague_N']]], axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 42)

In [6]:
#!pip install lightgbm

In [7]:
lgbm_model = LGBMRegressor()
lgbm_model.fit(X_train, y_train)

LGBMRegressor()

In [8]:
#TAHMİN

In [9]:
y_pred = lgbm_model.predict(X_test, num_iteration = lgbm_model.best_iteration_)

In [10]:
np.sqrt(mean_squared_error(y_test, y_pred))

363.8712087611089

In [11]:
#TUNING

In [12]:
lgbm_params = {
    'learning_rate':[0.001,0.01,0.1,0.2],
    'n_estimators':[200,500,1000,2000],
    'max_depth':[3,5,8,50,100],
}

lgbm_model = LGBMRegressor()
lgbm_cv_model = GridSearchCV(lgbm_model, lgbm_params, cv = 10, n_jobs = -1, verbose = 2)

In [13]:
lgbm_cv_model.fit(X_train, y_train)

Fitting 10 folds for each of 80 candidates, totalling 800 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:    4.3s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:   17.3s
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:   28.7s
[Parallel(n_jobs=-1)]: Done 800 out of 800 | elapsed:   35.6s finished


GridSearchCV(cv=10, estimator=LGBMRegressor(), n_jobs=-1,
             param_grid={'learning_rate': [0.001, 0.01, 0.1, 0.2],
                         'max_depth': [3, 5, 8, 50, 100],
                         'n_estimators': [200, 500, 1000, 2000]},
             verbose=2)

In [14]:
lgbm_cv_model.best_params_

{'learning_rate': 0.01, 'max_depth': 8, 'n_estimators': 200}

In [15]:
lgbm_tuned = LGBMRegressor(learning_rate = 0.01, max_depth = 8, n_estimators = 200)

In [16]:
lgbm_tuned.fit(X_train, y_train)

LGBMRegressor(learning_rate=0.01, max_depth=8, n_estimators=200)

In [18]:
y_pred = lgbm_tuned.predict(X_test, num_iteration = lgbm_tuned.best_iteration_)
np.sqrt(mean_squared_error(y_test, y_pred))

371.48946527867196