In [21]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.metrics import r2_score, root_mean_squared_error
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from xgboost import XGBRegressor

In [8]:
housing = pd.read_csv('../Datasets/Housing.csv')
housing.head(2)

Unnamed: 0,price,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
0,42000.0,5850,3,1,2,yes,no,yes,no,no,1,no
1,38500.0,4000,2,1,1,yes,no,no,no,no,0,no


In [18]:
X = housing.drop('price', axis=1)
y = housing['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)
ohe_trf = make_column_transformer((OneHotEncoder(sparse_output=False, drop='first'), make_column_selector(dtype_include=object)),
                                  remainder='passthrough', verbose_feature_names_out=False).set_output(transform='pandas')

rates = np.linspace(0.01, 0.8, 20)
n_est = np.arange(50, 200, 30)
depths = [None, 2, 3, 5, 7]
scores = []
X_train_trf = ohe_trf.fit_transform(X_train)
X_test_trf = ohe_trf.transform(X_test)
for rate in tqdm(rates):
    for n in n_est:
        for d in depths:
            xgbr = XGBRegressor(random_state = 23,  n_estimators=n, max_depth=d, learning_rate=rate)
            xgbr.fit(X_train_trf,y_train)
            y_pred = xgbr.predict(X_test_trf)
            scores.append([rate, n, d,r2_score(y_test, y_pred), root_mean_squared_error(y_test, y_pred)])

scores = pd.DataFrame(scores, columns=['Rate', 'n_est','max depth', 'r2', 'rmse'])
scores.sort_values(by='r2', ascending=False)



  0%|          | 0/20 [00:00<?, ?it/s]

100%|██████████| 20/20 [00:55<00:00,  2.77s/it]


Unnamed: 0,Rate,n_est,max depth,r2,rmse
81,0.134737,80,2.0,0.628548,16673.116113
251,0.425789,50,2.0,0.625497,16741.449616
96,0.134737,170,2.0,0.625426,16743.051858
86,0.134737,110,2.0,0.623626,16783.226380
46,0.051579,170,2.0,0.622669,16804.545089
...,...,...,...,...,...
3,0.010000,50,5.0,0.340971,22208.429923
0,0.010000,50,,0.331190,22372.618382
4,0.010000,50,7.0,0.328618,22415.599629
2,0.010000,50,3.0,0.303722,22827.417883


light gbm

In [20]:
from lightgbm import LGBMRegressor
scores = []
for rate in tqdm(rates):
    for n in n_est:
        for d in depths:
            lgbmr = LGBMRegressor(random_state = 23,  n_estimators=n, max_depth=d, learning_rate=rate)
            lgbmr.fit(X_train_trf,y_train)
            y_pred = lgbmr.predict(X_test_trf)
            scores.append([rate, n, d,r2_score(y_test, y_pred), root_mean_squared_error(y_test, y_pred)])

scores = pd.DataFrame(scores, columns=['Rate', 'n_est','max depth', 'r2', 'rmse'])
scores.sort_values(by='r2', ascending=False)


100%|██████████| 20/20 [00:27<00:00,  1.36s/it]


Unnamed: 0,Rate,n_est,max depth,r2,rmse
30,0.051579,80,,0.643418,16335.980134
34,0.051579,80,7.0,0.641143,16388.014447
33,0.051579,80,5.0,0.641031,16390.578095
39,0.051579,110,7.0,0.640663,16398.963065
35,0.051579,110,,0.640597,16400.466423
...,...,...,...,...,...
0,0.010000,50,,0.340341,22219.046787
4,0.010000,50,7.0,0.340214,22221.173510
3,0.010000,50,5.0,0.337951,22259.251471
2,0.010000,50,3.0,0.316523,22616.618618


CAT BOOST

In [23]:
from catboost import CatBoostRegressor
scores = []
for rate in tqdm(rates):
    for n in n_est:
        for d in depths:
            lgbmr = CatBoostRegressor(random_state = 23,  n_estimators=n, max_depth=d, learning_rate=rate, verbose=0)
            lgbmr.fit(X_train_trf,y_train)
            y_pred = lgbmr.predict(X_test_trf)
            scores.append([rate, n, d,r2_score(y_test, y_pred), root_mean_squared_error(y_test, y_pred)])

scores = pd.DataFrame(scores, columns=['Rate', 'n_est','max depth', 'r2', 'rmse'])
scores.sort_values(by='r2', ascending=False)

100%|██████████| 20/20 [01:36<00:00,  4.81s/it]


Unnamed: 0,Rate,n_est,max depth,r2,rmse
253,0.425789,50,5.0,0.658617,15984.045969
281,0.467368,80,2.0,0.652994,16115.134749
175,0.301053,50,,0.651599,16147.498351
228,0.384211,50,5.0,0.647180,16249.575585
159,0.259474,80,7.0,0.646273,16270.446626
...,...,...,...,...,...
0,0.010000,50,,0.286553,23107.152614
3,0.010000,50,5.0,0.279185,23226.160758
4,0.010000,50,7.0,0.273503,23317.530573
2,0.010000,50,3.0,0.265527,23445.183274
