In [14]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split 
from sklearn.metrics import r2_score
from sklearn.preprocessing import PolynomialFeatures # for polynomial features
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import Pipeline

from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [15]:

bos = pd.read_csv(r"..\Cases\Concrete Strength\Concrete_Data.csv")
y = bos['Strength']
X = bos.drop('Strength', axis=1)

In [16]:
kfold = kfold = KFold(n_splits=5, shuffle=True, random_state=24)

## Ridge

In [17]:
poly = PolynomialFeatures()
ridge = Ridge()

pipe = Pipeline([('POLY', poly), ('LR', ridge)])
print(pipe.get_params())
params = {'POLY__degree':[1,2,3,4,5], 
          'LR__alpha': np.linspace(0.001,5,10)}
gcv_ridge = GridSearchCV(pipe, param_grid=params, cv= kfold)
gcv_ridge.fit(X, y)
print(gcv_ridge.best_score_)
print(gcv_ridge.best_params_)

{'memory': None, 'steps': [('POLY', PolynomialFeatures()), ('LR', Ridge())], 'verbose': False, 'POLY': PolynomialFeatures(), 'LR': Ridge(), 'POLY__degree': 2, 'POLY__include_bias': True, 'POLY__interaction_only': False, 'POLY__order': 'C', 'LR__alpha': 1.0, 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__max_iter': None, 'LR__positive': False, 'LR__random_state': None, 'LR__solver': 'auto', 'LR__tol': 0.0001}
0.861858899008506
{'LR__alpha': 5.0, 'POLY__degree': 3}


In [18]:
# LASSO

from sklearn.linear_model import Lasso


poly = PolynomialFeatures()
lasso = Lasso()

pipe = Pipeline([('POLY', poly), ('LR', lasso)])
print(pipe.get_params())
params = {'POLY__degree':[1,2,3], 
          'LR__alpha': np.linspace(0.001,5,10)}

gcv_lasso = GridSearchCV(pipe, param_grid=params, cv= kfold)
gcv_lasso.fit(X, y)
print(gcv_lasso.best_score_)
print(gcv_lasso.best_params_)

{'memory': None, 'steps': [('POLY', PolynomialFeatures()), ('LR', Lasso())], 'verbose': False, 'POLY': PolynomialFeatures(), 'LR': Lasso(), 'POLY__degree': 2, 'POLY__include_bias': True, 'POLY__interaction_only': False, 'POLY__order': 'C', 'LR__alpha': 1.0, 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__max_iter': 1000, 'LR__positive': False, 'LR__precompute': False, 'LR__random_state': None, 'LR__selection': 'cyclic', 'LR__tol': 0.0001, 'LR__warm_start': False}
0.8526437598479253
{'LR__alpha': 0.001, 'POLY__degree': 3}


In [19]:
from sklearn.linear_model import ElasticNet


poly = PolynomialFeatures()
elastinet = ElasticNet()

pipe = Pipeline([('POLY', poly), ('LR', elastinet)])
print(pipe.get_params())
params = {'POLY__degree':[1,2,3], 
          'LR__alpha': np.linspace(0.001,5,10),
          'LR__l1_ratio':np.linspace(0,1,5)}
gcv_elast = GridSearchCV(pipe, param_grid=params, cv= kfold)
gcv_elast.fit(X, y)
print(gcv_elast.best_score_)
print(gcv_elast.best_params_)

{'memory': None, 'steps': [('POLY', PolynomialFeatures()), ('LR', ElasticNet())], 'verbose': False, 'POLY': PolynomialFeatures(), 'LR': ElasticNet(), 'POLY__degree': 2, 'POLY__include_bias': True, 'POLY__interaction_only': False, 'POLY__order': 'C', 'LR__alpha': 1.0, 'LR__copy_X': True, 'LR__fit_intercept': True, 'LR__l1_ratio': 0.5, 'LR__max_iter': 1000, 'LR__positive': False, 'LR__precompute': False, 'LR__random_state': None, 'LR__selection': 'cyclic', 'LR__tol': 0.0001, 'LR__warm_start': False}
0.852994552489777
{'LR__alpha': 0.5564444444444444, 'LR__l1_ratio': 0.25, 'POLY__degree': 3}


Inferencing

In [22]:
best_model = gcv_ridge.best_estimator_

In [20]:
# Unlabelled data
test_conc = pd.read_csv(r'..\Cases\Concrete Strength\testConcrete.csv') 
test_conc.head()

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age
0,495,120,0,155,5,866,884,75
1,262,129,0,271,2,808,787,174
2,201,48,1,215,5,807,839,113
3,329,141,0,286,1,881,823,229
4,354,14,0,129,2,839,847,210


In [23]:
pred_strength = best_model.predict(test_conc)
print(pred_strength)

[  63.39445467  -37.86820324   52.3075848  -325.36598316  104.60665732
  -42.16309984 -110.17062455  144.5647186   -22.42178978  -16.64449888
   66.03425662  238.50640694 -383.84854085   90.48292699]
