In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold, RandomizedSearchCV
from sklearn.metrics import accuracy_score, log_loss, r2_score
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.svm import SVC, SVR
from sklearn.pipeline import Pipeline
from sklearn.ensemble import BaseEnsemble, VotingClassifier, VotingRegressor
from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge, Lasso
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import warnings
warnings.filterwarnings('ignore')

In [3]:
con = pd.read_csv(r"..\Cases\Concrete Strength\Concrete_Data.csv")
y = con['Strength']
X = con.drop('Strength', axis=1)

In [4]:
lr = LinearRegression()
rid = Ridge()
las = Lasso()
dtr = DecisionTreeRegressor(random_state=24)

In [5]:
voting = VotingRegressor([('LR', lr), ('RID', rid), ('LAS', las),
                           ('TREE', dtr)])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=24)

In [7]:
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)
r2_lr = r2_score(y_test, y_pred)

In [8]:
rid.fit(X_train, y_train)
y_pred = rid.predict(X_test)
r2_rid = r2_score(y_test, y_pred)

In [9]:
las.fit(X_train, y_train)
y_pred = las.predict(X_test)
r2_las = r2_score(y_test, y_pred)

In [10]:
dtr.fit(X_train, y_train)
y_pred = dtr.predict(X_test)
r2_dtr = r2_score(y_test, y_pred)

In [11]:
voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)
r2_voting = r2_score(y_test, y_pred)

In [12]:
print("LR", r2_lr)
print("RID", r2_rid)
print("LAS", r2_las)
print("DTR", r2_dtr)
print("VOTE", r2_voting)

LR 0.5771752777048791
RID 0.5771749099675627
LAS 0.5763335877871639
DTR 0.83089474226832
VOTE 0.726968009759825


In [13]:
voting = VotingRegressor([('LR',lr), ('RID', rid), ('LAS', las),('TREE', dtr) ], weights=[r2_lr,r2_rid, r2_las, r2_dtr])

In [24]:
voting1 = VotingRegressor([('LR',lr), ('RID', rid), ('LAS', las),('TREE', dtr) ])

In [14]:
voting.fit(X_train, y_train)

In [15]:
y_pred = voting.predict(X_test)
r2_score(y_test, y_pred)

0.7604627780013105

In [16]:
kfold = KFold(n_splits=5, shuffle=True, random_state=24)
print(voting.get_params())

{'estimators': [('LR', LinearRegression()), ('RID', Ridge()), ('LAS', Lasso()), ('TREE', DecisionTreeRegressor(random_state=24))], 'n_jobs': None, 'verbose': False, 'weights': [0.5771752777048791, 0.5771749099675627, 0.5763335877871639, 0.83089474226832], 'LR': LinearRegression(), 'RID': Ridge(), 'LAS': Lasso(), 'TREE': DecisionTreeRegressor(random_state=24), 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__n_jobs': None, 'LR__positive': False, 'RID__alpha': 1.0, 'RID__copy_X': True, 'RID__fit_intercept': True, 'RID__max_iter': None, 'RID__positive': False, 'RID__random_state': None, 'RID__solver': 'auto', 'RID__tol': 0.0001, 'LAS__alpha': 1.0, 'LAS__copy_X': True, 'LAS__fit_intercept': True, 'LAS__max_iter': 1000, 'LAS__positive': False, 'LAS__precompute': False, 'LAS__random_state': None, 'LAS__selection': 'cyclic', 'LAS__tol': 0.0001, 'LAS__warm_start': False, 'TREE__ccp_alpha': 0.0, 'TREE__criterion': 'squared_error', 'TREE__max_depth': None, 'TREE__max_features': None, 'TREE__m

In [21]:
params = {'RID__alpha': np.linspace(0.001, 3, 5),
'LAS__alpha':np.linspace(0.001, 3, 5),
'TREE__max_depth': [None,3,4,5], 
'TREE__min_samples_split': [2,5,10],
'TREE__min_samples_leaf': [1,5,10]
}
gcv = GridSearchCV(voting, param_grid=params, cv=kfold, 
                   scoring='r2', n_jobs=-1)

In [22]:
          

gcv.fit(X, y)


In [23]:
print(gcv.best_score_)
print(gcv.best_params_)

0.7754886685757552
{'LAS__alpha': 0.001, 'RID__alpha': 0.001, 'TREE__max_depth': None, 'TREE__min_samples_leaf': 1, 'TREE__min_samples_split': 2}


In [25]:
# without weights
gcv = GridSearchCV(voting1, param_grid=params, cv=kfold, 
                   scoring='r2', n_jobs=-1)

In [26]:
gcv.fit(X, y)

In [27]:
print(gcv.best_score_)
print(gcv.best_params_)

0.7444419391024694
{'LAS__alpha': 0.001, 'RID__alpha': 0.001, 'TREE__max_depth': None, 'TREE__min_samples_leaf': 1, 'TREE__min_samples_split': 2}


### Randomized Search CV

In [28]:
kfold = KFold(n_splits=5, shuffle=True, random_state=24)
print(voting.get_params())

{'estimators': [('LR', LinearRegression()), ('RID', Ridge()), ('LAS', Lasso()), ('TREE', DecisionTreeRegressor(random_state=24))], 'n_jobs': None, 'verbose': False, 'weights': [0.5771752777048791, 0.5771749099675627, 0.5763335877871639, 0.83089474226832], 'LR': LinearRegression(), 'RID': Ridge(), 'LAS': Lasso(), 'TREE': DecisionTreeRegressor(random_state=24), 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__n_jobs': None, 'LR__positive': False, 'RID__alpha': 1.0, 'RID__copy_X': True, 'RID__fit_intercept': True, 'RID__max_iter': None, 'RID__positive': False, 'RID__random_state': None, 'RID__solver': 'auto', 'RID__tol': 0.0001, 'LAS__alpha': 1.0, 'LAS__copy_X': True, 'LAS__fit_intercept': True, 'LAS__max_iter': 1000, 'LAS__positive': False, 'LAS__precompute': False, 'LAS__random_state': None, 'LAS__selection': 'cyclic', 'LAS__tol': 0.0001, 'LAS__warm_start': False, 'TREE__ccp_alpha': 0.0, 'TREE__criterion': 'squared_error', 'TREE__max_depth': None, 'TREE__max_features': None, 'TREE__m

In [30]:
params = {'RID__alpha': np.linspace(0.001, 3, 10),
'LAS__alpha':np.linspace(0.001, 3, 10),
'TREE__max_depth': [None,3,4,5], 
'TREE__min_samples_split': [2,4,5,8,10],
'TREE__min_samples_leaf': [1,4,5,8,10]
}

In [31]:
rgcv = RandomizedSearchCV(voting1, param_distributions=params, cv=kfold, random_state=24, scoring='r2', n_jobs=-1, n_iter=20)
rgcv.fit(X, y)

In [32]:
print(rgcv.best_score_)
print(rgcv.best_params_)

0.7442964707173159
{'TREE__min_samples_split': 2, 'TREE__min_samples_leaf': 1, 'TREE__max_depth': None, 'RID__alpha': 1.6671111111111112, 'LAS__alpha': 2.0003333333333333}
