In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


**When we receive a dataset, we can automate the machine learning processes applied to it in order to test all models and obtain results for each. This automation process allows us to apply different machine learning algorithms to the same dataset, compare their performances, and select the most suitable model. By doing so, all steps from model training to hyperparameter optimization are executed quickly and systematically.**

*Elimize bir veri seti ulaştığında, bu veri seti üzerinde gerçekleştireceğimiz makine öğrenmesi işlemlerini otomatikleştirerek, tüm modelleri denemek ve bunlara ilişkin sonuçlar elde etmek mümkündür. Bu otomatikleştirme süreci, farklı makine öğrenmesi algoritmalarını aynı veri seti üzerinde uygulayarak performanslarını karşılaştırmayı ve en uygun modeli seçmeyi sağlar. Bu sayede, model eğitiminden hiperparametre optimizasyonuna kadar tüm adımlar hızlı ve sistematik bir şekilde yürütülür.*

In [25]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn import model_selection
import warnings
warnings.filterwarnings("ignore")

In [5]:
df = pd.read_csv("/content/gdrive/MyDrive/Colab Notebooks/ml/Hitters.csv")
df.dropna(inplace=True)
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])

**Automatic Function**

compML:Comparison of Machine Learning Models

**compML trains multiple machine learning models on the same dataset and compares their performances.**

*compML birden fazla makine öğrenimi modelini aynı veri seti üzerinde eğitip, performanslarını karşılaştırır.*

In [76]:
def compML(df, y, alg):

  # train-test split
  y = df[y]
  X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
  X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

  # modeling
  model = alg().fit(X_train, y_train)
  y_pred = model.predict(X_test)
  RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
  model_name = alg.__name__
  print(model_name, "RMSE test error: ", RMSE)
  return RMSE

In [60]:
!pip install lightgbm
from lightgbm import LGBMRegressor



In [61]:
compML(df, 'Salary', LGBMRegressor)

LGBMRegressor RMSE test error:  363.8712087611089


363.8712087611089

In [62]:
compML(df, 'Salary', DecisionTreeRegressor)

DecisionTreeRegressor RMSE test error:  456.4581294089472


456.4581294089472

In [63]:
compML(df, 'Salary', RandomForestRegressor)

RandomForestRegressor RMSE test error:  330.7201416516618


330.7201416516618

In [64]:
compML(df, 'Salary', GradientBoostingRegressor)

GradientBoostingRegressor RMSE test error:  352.307475227401


352.307475227401

In [65]:
compML(df, 'Salary', LGBMRegressor)

LGBMRegressor RMSE test error:  363.8712087611089


363.8712087611089

In [66]:
compML(df, 'Salary', KNeighborsRegressor)

KNeighborsRegressor RMSE test error:  426.6570764525201


426.6570764525201

In [67]:
compML(df, 'Salary', SVR)

SVR RMSE test error:  460.0032657244849


460.0032657244849

In [68]:
compML(df, 'Salary', Ridge)

Ridge RMSE test error:  356.80829057302424


356.80829057302424

In [69]:
compML(df, 'Salary', Lasso)

Lasso RMSE test error:  356.0975884554034


356.0975884554034

**Automation with model list**

In [70]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

In [71]:
models = [
    LinearRegression,
    Ridge,
    Lasso,
    ElasticNet,
    DecisionTreeRegressor,
    RandomForestRegressor,
    GradientBoostingRegressor,
    AdaBoostRegressor,
    SVR,
    XGBRegressor,
    LGBMRegressor
]

In [77]:
for i in models:
  compML(df, 'Salary', i)

LinearRegression RMSE test error:  357.0532909460735
Ridge RMSE test error:  356.80829057302424
Lasso RMSE test error:  356.0975884554034
ElasticNet RMSE test error:  357.1676548181246
DecisionTreeRegressor RMSE test error:  502.67985269708254
RandomForestRegressor RMSE test error:  343.9451170959347
GradientBoostingRegressor RMSE test error:  345.90696536356836
AdaBoostRegressor RMSE test error:  363.6077517962934
SVR RMSE test error:  460.0032657244849
XGBRegressor RMSE test error:  366.3863437634965
LGBMRegressor RMSE test error:  363.8712087611089


In [58]:
'''
__name__ fonksiyonu, Python'da bir nesnenin veya fonksiyonun adını elde etmek için kullanılan
özel bir attribute'dur. Genellikle fonksiyonlar, sınıflar veya modüller için kullanılır ve o
nesnenin ismini döndürür.

Python'da bir fonksiyon hiçbir şey döndürmezse varsayılan olarak None değeri döndürür. Yani, bir
fonksiyonda açıkça return ifadesi kullanmazsan, Python o fonksiyonun sonucunu None olarak kabul eder.
'''