In [1]:
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.metrics import r2_score, root_mean_squared_error
from sklearn.model_selection import train_test_split
from tqdm import tqdm


On concrete strength data, using AdaBoostRegressor without scaling

In [4]:
df = pd.read_csv('../Datasets/cases/Concrete_Strength/Concrete_Data.csv')
y = df['Strength']
X = df.drop('Strength', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [6]:
lin_reg = LinearRegression()
e_net1 = ElasticNet()
e_net2 = ElasticNet(alpha=2.0, l1_ratio=0.8)
dtr1 = DecisionTreeRegressor(max_depth=1)
dtr2 = DecisionTreeRegressor(max_depth=3)
dtr3 = DecisionTreeRegressor(max_depth=None)

estimators = [lin_reg, e_net1, e_net2, dtr1, dtr2, dtr3]

In [7]:
n_est = [i for i in range(3, 140)]
scores = []


for est in tqdm(estimators):
    for n in n_est:

        ada = AdaBoostRegressor(estimator=est, n_estimators=n, random_state=25)
        ada.fit(X_train, y_train)
        y_pred = ada.predict(X_test)
        scores.append([est, n,
                      r2_score(y_test, y_pred), root_mean_squared_error(y_test, y_pred)])
        

scores_df = pd.DataFrame(scores, columns=['estimator', 'n_est', 'r2', 'rmse'])
scores_df.sort_values(by='r2', ascending=False)

100%|██████████| 6/6 [01:35<00:00, 15.95s/it]


Unnamed: 0,estimator,n_est,r2,rmse
714,DecisionTreeRegressor(),32,0.874127,5.855377
709,DecisionTreeRegressor(),27,0.873258,5.875567
699,DecisionTreeRegressor(),17,0.873157,5.877915
693,DecisionTreeRegressor(),11,0.872787,5.886462
711,DecisionTreeRegressor(),29,0.872590,5.891023
...,...,...,...,...
414,DecisionTreeRegressor(max_depth=1),6,0.396645,12.819637
415,DecisionTreeRegressor(max_depth=1),7,0.378482,13.011165
413,DecisionTreeRegressor(max_depth=1),5,0.355290,13.251703
412,DecisionTreeRegressor(max_depth=1),4,0.336096,13.447518


With standard scaling

In [8]:
from sklearn.preprocessing import StandardScaler

std = StandardScaler()
X_train_trns = std.fit_transform(X_train)
X_test_trns = std.transform(X_test)

n_est = [i for i in range(3, 60)]
scores = []


for est in tqdm(estimators):
    for n in n_est:

        ada = AdaBoostRegressor(estimator=est, n_estimators=n, random_state=25)
        ada.fit(X_train_trns, y_train)
        y_pred = ada.predict(X_test_trns)
        scores.append([est, n,
                      r2_score(y_test, y_pred), root_mean_squared_error(y_test, y_pred)])
        

scores_df = pd.DataFrame(scores, columns=['estimator', 'n_est', 'r2', 'rmse'])
scores_df.sort_values(by='r2', ascending=False)

100%|██████████| 6/6 [00:21<00:00,  3.55s/it]


Unnamed: 0,estimator,n_est,r2,rmse
316,DecisionTreeRegressor(),34,0.877724,5.771108
309,DecisionTreeRegressor(),27,0.877666,5.772478
308,DecisionTreeRegressor(),26,0.877334,5.780322
313,DecisionTreeRegressor(),31,0.876934,5.789723
310,DecisionTreeRegressor(),28,0.876854,5.791602
...,...,...,...,...
174,DecisionTreeRegressor(max_depth=1),6,0.396645,12.819637
175,DecisionTreeRegressor(max_depth=1),7,0.378482,13.011165
173,DecisionTreeRegressor(max_depth=1),5,0.355290,13.251703
172,DecisionTreeRegressor(max_depth=1),4,0.336096,13.447518
