In [23]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, train_test_split, KFold, RandomizedSearchCV
from sklearn.metrics import accuracy_score, r2_score
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import Ridge, Lasso, LinearRegression

In [24]:
df = pd.read_csv(r"C:\Users\Administrator.DAI-PC2\Downloads\Shubham\Practical Machine Learning\ClassWork\Cases\Concrete Strength\Concrete_Data.csv")
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.3, random_state=24)

In [26]:
## Model 1

In [27]:
ridge = Ridge()
ridge.fit(X_train,y_train)
y_pred = ridge.predict(X_test)
r2_ridge = r2_score(y_test,y_pred)
r2_ridge

0.5771749099675626

In [28]:
## Model 2

In [29]:
lasso = Lasso()
lasso.fit(X_train,y_train)
y_pred = lasso.predict(X_test)
r2_lasso = r2_score(y_test,y_pred)
r2_lasso

0.576333587787164

In [30]:
## Model 3

In [31]:
lr = LinearRegression()
lr.fit(X_train,y_train)
y_pred = lr.predict(X_test)
r2_lr = r2_score(y_test,y_pred)
r2_lr

0.5771752777048791

In [32]:
## Model 4

In [33]:
dtc = DecisionTreeRegressor(random_state=24)
dtc.fit(X_train,y_train)
y_pred = dtc.predict(X_test)
r2_dtc = r2_score(y_test,y_pred)
r2_dtc

0.83089474226832

In [34]:
### Average voting

In [35]:
voting_avg = VotingRegressor([("RIDGE",ridge),("LASSO",lasso),("LR",lr),("TREE",dtc)])
voting_avg.fit(X_train,y_train)
y_pred  = voting_avg.predict(X_test)
print("R2 Score = ", r2_score(y_test,y_pred))

R2 Score =  0.726968009759825


In [36]:
###### weighted voting averaging

In [37]:
voting_w = VotingRegressor([("RIDGE",ridge),("LASSO",lasso),("LR",lr),("TREE",dtc)],weights = [r2_ridge,r2_lasso,r2_lr,r2_dtc])
voting_w.fit(X_train,y_train)
y_pred  = voting_w.predict(X_test)
print("R2 Score = ", r2_score(y_test,y_pred))

R2 Score =  0.7604627780013105


In [38]:
#### GridSearch

In [39]:
print(voting_avg.get_params())

{'estimators': [('RIDGE', Ridge()), ('LASSO', Lasso()), ('LR', LinearRegression()), ('TREE', DecisionTreeRegressor(random_state=24))], 'n_jobs': None, 'verbose': False, 'weights': None, 'RIDGE': Ridge(), 'LASSO': Lasso(), 'LR': LinearRegression(), 'TREE': DecisionTreeRegressor(random_state=24), 'RIDGE__alpha': 1.0, 'RIDGE__copy_X': True, 'RIDGE__fit_intercept': True, 'RIDGE__max_iter': None, 'RIDGE__positive': False, 'RIDGE__random_state': None, 'RIDGE__solver': 'auto', 'RIDGE__tol': 0.0001, 'LASSO__alpha': 1.0, 'LASSO__copy_X': True, 'LASSO__fit_intercept': True, 'LASSO__max_iter': 1000, 'LASSO__positive': False, 'LASSO__precompute': False, 'LASSO__random_state': None, 'LASSO__selection': 'cyclic', 'LASSO__tol': 0.0001, 'LASSO__warm_start': False, 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__n_jobs': None, 'LR__positive': False, 'TREE__ccp_alpha': 0.0, 'TREE__criterion': 'squared_error', 'TREE__max_depth': None, 'TREE__max_features': None, 'TREE__max_leaf_nodes': None, 'TREE__m

In [40]:
kfold = KFold(n_splits=5,shuffle=True,random_state=24)
params = {"RIDGE__alpha":np.linspace(0.001,3,5),"LASSO__alpha":np.linspace(0.001,3,5),"TREE__min_samples_split":[2,5,10],
          "TREE__min_samples_leaf":[1,5,10],"TREE__max_depth":[None,3,4,5]}
gcv = GridSearchCV(voting_avg,param_grid=params,cv=kfold,scoring='r2',n_jobs=-1)
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

{'LASSO__alpha': 0.001, 'RIDGE__alpha': 0.001, 'TREE__max_depth': None, 'TREE__min_samples_leaf': 1, 'TREE__min_samples_split': 2}
0.7444419391024694


In [41]:
kfold = KFold(n_splits=5,shuffle=True,random_state=24)
params = {"RIDGE__alpha":np.linspace(0.001,3,5),"LASSO__alpha":np.linspace(0.001,3,5),"TREE__min_samples_split":[2,5,10],
          "TREE__min_samples_leaf":[1,5,10],"TREE__max_depth":[None,3,4,5]}
gcv = GridSearchCV(voting_w,param_grid=params,cv=kfold,scoring='r2',n_jobs=-1)
gcv.fit(X,y)
print(gcv.best_params_)
print(gcv.best_score_)

{'LASSO__alpha': 0.001, 'RIDGE__alpha': 0.001, 'TREE__max_depth': None, 'TREE__min_samples_leaf': 1, 'TREE__min_samples_split': 2}
0.7754886685757552


In [42]:
#### RandomizedSearchCV

In [44]:
kfold = KFold(n_splits=5,shuffle=True,random_state=24)
params = {"RIDGE__alpha":np.linspace(0.001,3,10),"LASSO__alpha":np.linspace(0.001,3,10),"TREE__min_samples_split":[2,4,5,8,10],
          "TREE__min_samples_leaf":[1,4,5,8,10],"TREE__max_depth":[None,3,4,5]}
rgcv = RandomizedSearchCV(voting_avg,param_distributions=params,cv=kfold,scoring='r2',n_jobs=-1,n_iter =20,random_state=24)
rgcv.fit(X,y)
print(rgcv.best_params_)
print(rgcv.best_score_)

{'TREE__min_samples_split': 2, 'TREE__min_samples_leaf': 1, 'TREE__max_depth': None, 'RIDGE__alpha': 1.6671111111111112, 'LASSO__alpha': 2.0003333333333333}
0.7442964707173159


In [48]:
kfold = KFold(n_splits=5,shuffle=True,random_state=24)
params = {"RIDGE__alpha":np.linspace(0.001,3,10),"LASSO__alpha":np.linspace(0.001,3,10),"TREE__min_samples_split":[2,4,5,8,10],
          "TREE__min_samples_leaf":[1,4,5,8,10],"TREE__max_depth":[None,3,4,5]}
rgcv = RandomizedSearchCV(voting_w,param_distributions=params,cv=kfold,scoring='r2',n_jobs=-1,n_iter =20,random_state=24)
rgcv.fit(X,y)
print(rgcv.best_params_)
print(rgcv.best_score_)

{'TREE__min_samples_split': 2, 'TREE__min_samples_leaf': 1, 'TREE__max_depth': None, 'RIDGE__alpha': 1.6671111111111112, 'LASSO__alpha': 2.0003333333333333}
0.7753492120951057


In [49]:
pd.DataFrame(rgcv.cv_results_)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_TREE__min_samples_split,param_TREE__min_samples_leaf,param_TREE__max_depth,param_RIDGE__alpha,param_LASSO__alpha,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.018856,0.001713,0.0,0.0,10,10,5.0,2.666778,0.001,"{'TREE__min_samples_split': 10, 'TREE__min_sam...",0.716608,0.747113,0.764284,0.729454,0.749589,0.74141,0.016618,11
1,0.011686,0.002406,0.012499,0.00625,2,5,3.0,1.667111,2.666778,"{'TREE__min_samples_split': 2, 'TREE__min_samp...",0.694119,0.711956,0.712161,0.676915,0.718718,0.702774,0.015299,20
2,0.012579,0.006292,0.009375,0.007655,8,10,4.0,1.333889,1.000667,"{'TREE__min_samples_split': 8, 'TREE__min_samp...",0.706384,0.738056,0.753074,0.707928,0.736649,0.728418,0.018297,14
3,0.016638,0.001231,0.0,0.0,10,10,3.0,0.667444,1.667111,"{'TREE__min_samples_split': 10, 'TREE__min_sam...",0.694424,0.712123,0.712269,0.677216,0.718906,0.702988,0.015235,18
4,0.021882,0.007653,0.009373,0.007653,2,1,,1.667111,2.000333,"{'TREE__min_samples_split': 2, 'TREE__min_samp...",0.763081,0.77464,0.804304,0.738134,0.796588,0.775349,0.023772,1
5,0.021873,0.007656,0.006249,0.007653,2,1,5.0,1.000667,2.333556,"{'TREE__min_samples_split': 2, 'TREE__min_samp...",0.732357,0.747273,0.770475,0.730542,0.74892,0.745913,0.014382,7
6,0.024911,0.007503,0.006274,0.003705,8,5,4.0,1.667111,2.333556,"{'TREE__min_samples_split': 8, 'TREE__min_samp...",0.706179,0.734914,0.752266,0.707636,0.736395,0.727478,0.017869,17
7,0.020008,0.005775,0.010758,0.00705,8,5,4.0,3.0,1.000667,"{'TREE__min_samples_split': 8, 'TREE__min_samp...",0.706384,0.735042,0.752275,0.707928,0.736649,0.727656,0.017794,16
8,0.020957,0.003987,0.011493,0.002235,8,1,,2.000333,1.667111,"{'TREE__min_samples_split': 8, 'TREE__min_samp...",0.749464,0.772326,0.807491,0.739064,0.791674,0.772004,0.025453,3
9,0.024042,0.008651,0.011785,0.003072,5,4,,3.0,0.334222,"{'TREE__min_samples_split': 5, 'TREE__min_samp...",0.739859,0.766824,0.802354,0.753741,0.779069,0.768369,0.021441,4
