On concrete strength data do voting regression, use linear reg, elasticnet and decision tree regression. use R2 score.

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.ensemble import VotingRegressor            # import for voting ensembling regression
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.metrics import r2_score, root_mean_squared_error
from sklearn.model_selection import train_test_split


In [9]:
df = pd.read_csv('../Datasets/cases/Concrete_Strength/Concrete_Data.csv')
X = df.drop('Strength', axis=1)
y = df['Strength']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25)

In [11]:
trf_scaler = make_column_transformer((StandardScaler(), make_column_selector(dtype_exclude=object)), 
                                     remainder='passthrough', verbose_feature_names_out=False).set_output(transform='pandas')

X_train_trf = trf_scaler.fit_transform(X_train)
X_test_trf = trf_scaler.transform(X_test)



In [12]:
lin_reg = LinearRegression()
e_net1 = ElasticNet(alpha=0.5, l1_ratio=0.5)
e_net2 = ElasticNet(alpha=0.8, l1_ratio=0.8)
dtr1 = DecisionTreeRegressor(random_state=25)
dtr2 = DecisionTreeRegressor(random_state=25, max_depth=3)

voting = VotingRegressor(estimators=[
    ('LinearRegression', lin_reg),
    ('ElasticNet1', e_net1),
    ('ElasticNet2', e_net2),
    ('DecisionTreeRegressor1', dtr1),
    ('DecisionTreeRegressor2', dtr2)]
    )

In [15]:
voting.fit(X_train_trf, y_train)
y_pred_trf = voting.predict(X_test_trf)
print(r2_score(y_test, y_pred_trf))

0.7534927026429945


In [None]:
for i in range(len(voting.estimators_)):
    print('Estimator: ', voting.estimators_[i])
    print('R2 Score = ', r2_score(y_test, voting.estimators_[i].predict(X_test_trf)))

Estimator:  LinearRegression()
R2 Score =  0.635183914246411
Estimator:  ElasticNet(alpha=0.5)
R2 Score =  0.5668745544971869
Estimator:  ElasticNet(alpha=0.8, l1_ratio=0.8)
R2 Score =  0.5650932986688736
Estimator:  DecisionTreeRegressor(random_state=25)
R2 Score =  0.8127760533837747
Estimator:  DecisionTreeRegressor(max_depth=3, random_state=25)
R2 Score =  0.5608309573895383


Without scaling

In [22]:
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(r2_score(y_test, y_pred))

0.7831678377685559


In [None]:
for i in range(len(voting.estimators_)):
    print('Estimator: ', voting.estimators_[i])
    print('R2 Score = ', r2_score(y_test, voting.estimators_[i].predict(X_test)))

Estimator:  LinearRegression()
R2 Score =  0.6351839142464111
Estimator:  ElasticNet(alpha=0.5)
R2 Score =  0.6348697759809028
Estimator:  ElasticNet(alpha=0.8, l1_ratio=0.8)
R2 Score =  0.634405448057903
Estimator:  DecisionTreeRegressor(random_state=25)
R2 Score =  0.8127760533837747
Estimator:  DecisionTreeRegressor(max_depth=3, random_state=25)
R2 Score =  0.5608309573895383


with minmax scaling

In [25]:
from sklearn.preprocessing import MinMaxScaler
trf_scaler = make_column_transformer((MinMaxScaler(), make_column_selector(dtype_exclude=object)), 
                                     remainder='passthrough', verbose_feature_names_out=False).set_output(transform='pandas')

X_train_trf = trf_scaler.fit_transform(X_train)
X_test_trf = trf_scaler.transform(X_test)

voting.fit(X_train_trf, y_train)
y_pred_trf = voting.predict(X_test_trf)
print(r2_score(y_test, y_pred_trf))
print()

for i in range(len(voting.estimators_)):
    print('Estimator: ', voting.estimators_[i])
    print('R2 Score = ', r2_score(y_test, voting.estimators_[i].predict(X_test_trf)))

0.6525802141157483

Estimator:  LinearRegression()
R2 Score =  0.635183914246411
Estimator:  ElasticNet(alpha=0.5)
R2 Score =  0.13601713200094767
Estimator:  ElasticNet(alpha=0.8, l1_ratio=0.8)
R2 Score =  0.11542599071083992
Estimator:  DecisionTreeRegressor(random_state=25)
R2 Score =  0.8127760533837747
Estimator:  DecisionTreeRegressor(max_depth=3, random_state=25)
R2 Score =  0.5608309573895383


without scaling performs better, now add weights

In [51]:
lin_reg = LinearRegression()
e_net1 = ElasticNet(alpha=0.5, l1_ratio=0.5)
e_net2 = ElasticNet(alpha=0.8, l1_ratio=0.8)
dtr1 = DecisionTreeRegressor(random_state=25)
dtr2 = DecisionTreeRegressor(random_state=25, max_depth=3)

voting = VotingRegressor(estimators=[
    ('LinearRegression', lin_reg),
    ('ElasticNet1', e_net1),
    ('ElasticNet2', e_net2),
    ('DecisionTreeRegressor1', dtr1),
    ('DecisionTreeRegressor2', dtr2)],
    weights=[0.5, 0.5, 0.5, 3.4, 0.1]
    )

voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
print(r2_score(y_test, y_pred))
print()

for i in range(len(voting.estimators_)):
    print('Estimator: ', voting.estimators_[i])
    print('R2 Score = ', r2_score(y_test, voting.estimators_[i].predict(X_test)))

0.855118135537777

Estimator:  LinearRegression()
R2 Score =  0.6351839142464111
Estimator:  ElasticNet(alpha=0.5)
R2 Score =  0.6348697759809028
Estimator:  ElasticNet(alpha=0.8, l1_ratio=0.8)
R2 Score =  0.634405448057903
Estimator:  DecisionTreeRegressor(random_state=25)
R2 Score =  0.8127760533837747
Estimator:  DecisionTreeRegressor(max_depth=3, random_state=25)
R2 Score =  0.5608309573895383
