In [None]:
import numpy as np 
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
import statsmodels.api as sm
import scipy.stats as stats
from statsmodels.stats.outliers_influence import variance_inflation_factor as VIF

In [None]:

pd.set_option('display.max_rows',None)
df1=pd.read_csv('hRUoRL.csv')
df1.head()

# Assumption 1 - y numeric

# Assumption 2 - No Multicollinearity

In [None]:
# VIF
df1.corr()

In [None]:
plt.figure(figsize=(12,6))
sns.heatmap(df1.corr(),annot=True)

In [None]:
X=df1.drop('PRICE',axis=1)
y=df1['PRICE']
Xc=sm.add_constant(X)

In [None]:
VIF(Xc.values,1)

In [None]:
vif_value = [VIF(Xc.values , i) for i in range(Xc.shape[1])]
pd.DataFrame(vif_value, columns = ['VIF_Value'],index = Xc.columns).sort_values('VIF_Value', ascending=False)


# ASSUMPTION 3 LINEAR RELATIONSHIP

In [None]:
ols_model=sm.OLS(y,Xc).fit()
ols_model.summary()

In [None]:
y_pred=ols_model.fittedvalues
residuals=ols_model.resid

In [None]:
plt.scatter(y_pred,residuals)
plt.axhline(0)

# Assumption 4 - No correlation

In [None]:
from statsmodels.stats.api import durbin_watson
durbin_watson(residuals)

In [None]:

# there is autocorrelation the assumption is valid.

# Assumption 5 - Homoscedasticity or   no Heteroscedasticity

# Bresuch Pan test

In [None]:
from statsmodels.stats.api import het_breuschpagan
het_breuschpagan(residuals,Xc)[2:] # 3rd is tstsctics and 4th is pvalue

In [None]:
# since pvalue is low reject h0 i.e. error terms are heteroscedastics

In [None]:
het_breuschpagan(residuals,Xc)# 1stvalue = ,2ndvalue = ,3rdvalue = tstats, 4thvalue = pvalue , first two value is not important 

# Assumption 6- Normality of Residuals

In [None]:
sns.distplot(residuals)

In [None]:
sns.displot(residuals,kde=True)

In [None]:
stats.shapiro(residuals)

In [None]:
# since p< 0.05 reject h0 residuals are not normal

In [None]:
from statsmodels.stats.api import jarque_bera
# h0: data is normal
stat, pval,res_skew,res_kurt=jarque_bera(residuals)
pval

In [None]:
# since p < 0.05 reject h0 residual is not normal

In [None]:
sns.distplot(y)

In [None]:
# try log transformation of y variable
logy= np.log(y) 

In [None]:
sns.displot(logy,kde=True)

In [None]:
sns.distplot(logy)

In [None]:
y.skew()

In [None]:
logy.skew()

In [None]:
# build a model of logy
ols_model= sm.OLS(logy,Xc).fit()
ols_model.summary()

In [None]:
residuals=ols_model.resid
sns.distplot(residuals)

In [None]:
stats.shapiro(residuals)

In [None]:
# pval < 0.05 h0 is rejected data is not normal.

# Model Evaluation

In [None]:
pd.set_option('display.max_rows',None)
df3=pd.read_csv('hRUoRL.csv')
df3.head()

In [None]:
X=df3.drop('PRICE',axis=1)
y=df3['PRICE']

In [None]:
y.head(3)

In [None]:
X.head(2)

In [None]:
from sklearn.model_selection import train_test_split


In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=100)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.shape

In [None]:
y_test.shape

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
lr=LinearRegression()
lr.fit(X_train,y_train) # training model

In [None]:
y_pred_test= lr.predict(X_test) # get prediction
y_pred_test

In [None]:
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error,mean_absolute_percentage_error

In [None]:
test_r2=r2_score(y_test,y_pred_test)
print('test_r2 ',test_r2)
test_mse=mean_squared_error(y_test,y_pred_test)
print('test_mse ', test_mse)
test_rmse=mean_absolute_error(y_test,y_pred_test)
print('test_rmse ', test_rmse)
test_mae=mean_absolute_error(y_test,y_pred_test)
print('test_mae ', test_mae)
test_mape=mean_absolute_percentage_error(y_test,y_pred_test)
print('test_mape ', test_mape)

In [None]:
y_pred_train=lr.predict(X_train)

In [None]:
train_r2=r2_score(y_train,y_pred_train)
print('train_r2 ',train_r2)
train_mse=mean_squared_error(y_train,y_pred_train)
print('train_mse ', train_mse)
train_rmse=mean_absolute_error(y_train,y_pred_train)
print('train_rmse ', train_rmse)
train_mae=mean_absolute_error(y_train,y_pred_train)
print('train_mae ', train_mae)
train_mape=mean_absolute_percentage_error(y_train,y_pred_train)
print('train_mape ', train_mape)

# Day3

In [None]:
from warnings import filterwarnings
filterwarnings('ignore')

#pd.options.display.max_columns = None
pd.options.display.max_rows = None
 
#to display the float values upto 6 decimal places     
pd.options.display.float_format = '{:.6f}'.format

#import train-test split 
from sklearn.model_selection import train_test_split

#import various functions from statsmodel to perform linear regression
import statsmodels
import statsmodels.api as sm
import statsmodels.stats.api as sms
from statsmodels.graphics.gofplots import qqplot
from statsmodels.stats.outliers_influence import variance_inflation_factor

#import various functions from scipy
from scipy import stats

# import 'metrics' from sklearn is used for evaluating the model performance
from sklearn.metrics import mean_squared_error

#import StandardScaler for scaling the data
from sklearn.preprocessing import StandardScaler

#functions for forward selection
from mlxtend.feature_selection import SequentialFeatureSelector as sfs
from sklearn.feature_selection import RFE

#functions for linear regression
from sklearn.linear_model import LinearRegression

#functions for  cross validation
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn import preprocessing

In [None]:
pd.set_option('display.max_rows',None)
df3=pd.read_csv('hRUoRL.csv')
df3.head()

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred_train = lr.predict(X_train)
y_pred_test = lr.predict(X_test)


train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
print('Train R2 ', train_r2)
print('Test R2  ', test_r2)

train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)
print("train RMSE ", train_rmse)
print("test RMSE ", test_rmse)

result_lr = ['LR Full model', train_r2, test_r2, train_rmse, test_rmse]
result_lr

In [None]:
from mlxtend.feature_selection import SequentialFeatureSelector as sfs

In [None]:
# SFS Forward
lr=LinearRegression()
lr_sfs= sfs(estimator=lr,k_features=5, forward=True)
sfs_forward=lr_sfs.fit(X_train,y_train)
forward_features=list(sfs_forward.k_feature_names_)
forward_features

In [None]:
# Forward in range
lr=LinearRegression()
lr_sfs= sfs(estimator=lr,k_features=(4,8), forward=True)
sfs_forward_range=lr_sfs.fit(X_train,y_train)
forward_features_range=list(sfs_forward_range.k_feature_names_)
forward_features_range

In [None]:
# Forward best
lr=LinearRegression()
lr_sfs= sfs(estimator=lr,k_features='best', forward=True)
sfs_forward_best=lr_sfs.fit(X_train,y_train)
forward_features_best=list(sfs_forward_best.k_feature_names_)
forward_features_best

In [None]:
# Forward best
lr=LinearRegression()
lr_sfs= sfs(estimator=lr,k_features='best', forward=True,verbose=2)   # verbose=0 by default no info ,verbose=1 some info,
sfs_forward_best=lr_sfs.fit(X_train,y_train)                          # verbose=2 more info
forward_features_best=list(sfs_forward_best.k_feature_names_)
forward_features_best

In [None]:
# SFS Backward

In [None]:
lr=LinearRegression()
lr_sfs= sfs(estimator=lr,k_features=5, forward=False)
sfs_backward=lr_sfs.fit(X_train,y_train)
backward_features=list(sfs_backward.k_feature_names_)
backward_features

In [None]:
# backward in range
lr=LinearRegression()
lr_sfs= sfs(estimator=lr,k_features=(4,8), forward=False)
sfs_backward_range=lr_sfs.fit(X_train,y_train)
backward_features_range=list(sfs_backward_range.k_feature_names_)
backward_features_range

In [None]:
# backward best
lr=LinearRegression()
lr_sfs= sfs(estimator=lr,k_features='best', forward=False)
sfs_backward_best=lr_sfs.fit(X_train,y_train)
backward_features_best=list(sfs_backward_best.k_feature_names_)
backward_features_best

In [None]:
# RFE 
from sklearn.feature_selection import RFE

In [None]:
lr=LinearRegression()
lr_rfe= RFE(estimator=lr,n_features_to_select=5)
rfe_model=lr_rfe.fit(X_train,y_train)
rfe_model.ranking_

In [None]:
rfe_rank=pd.DataFrame()
rfe_rank['rank']=rfe_model.ranking_
rfe_rank['features']=X_train.columns
rfe_list= list(rfe_rank['features'])
rfe_list

In [None]:
lr=LinearRegression()
lr.fit(X_train[rfe_list],y_train)  # only selected column
y_pred_train=lr.predict(X_train[rfe_list])
y_pred_test= lr.predict(X_test[rfe_list])
train_r2=r2_score(y_train,y_pred_train)
test_r2=r2_score(y_test,y_pred_test)
print('train_r2 ',train_r2 )
print('test_r2 ',test_r2 )
train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)
print("train RMSE ", train_rmse)
print("test RMSE ", test_rmse)

result_lr = ['LR Full model', train_r2, test_r2, train_rmse, test_rmse]
result_lr

In [None]:
lr = LinearRegression()
lr.fit(X_train[backward_features_best], y_train) # only selected columns


y_pred_train = lr.predict(X_train[backward_features_best]) # only selected columns
y_pred_test = lr.predict(X_test[backward_features_best]) # only selected columns


train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
print('Train R2 ', train_r2)
print('Test R2  ', test_r2)

train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)
print("train RMSE ", train_rmse)
print("test RMSE ", test_rmse)

result_back = ['backword', train_r2, test_r2, train_rmse, test_rmse]
result_back

In [None]:
lr = LinearRegression()
lr.fit(X_train[forward_features_best], y_train) # only selected columns


y_pred_train = lr.predict(X_train[forward_features_best]) # only selected columns
y_pred_test = lr.predict(X_test[forward_features_best]) # only selected columns


train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
print('Train R2 ', train_r2)
print('Test R2  ', test_r2)

train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)
print("train RMSE ", train_rmse)
print("test RMSE ", test_rmse)

result_forward = ['forward', train_r2, test_r2, train_rmse, test_rmse]
result_forward

In [None]:
lr = LinearRegression()
lr.fit(X_train[rfe_list], y_train) # only selected columns


y_pred_train = lr.predict(X_train[rfe_list]) # only selected columns
y_pred_test = lr.predict(X_test[rfe_list]) # only selected columns


train_r2 = r2_score(y_train, y_pred_train)
test_r2 = r2_score(y_test, y_pred_test)
print('Train R2 ', train_r2)
print('Test R2  ', test_r2)

train_mse = mean_squared_error(y_train, y_pred_train)
test_mse = mean_squared_error(y_test, y_pred_test)
train_rmse = np.sqrt(train_mse)
test_rmse = np.sqrt(test_mse)
print("train RMSE ", train_rmse)
print("test RMSE ", test_rmse)

result_rfe = ['rfe', train_r2, test_r2, train_rmse, test_rmse]
result_rfe

In [None]:
# gatheer results
results_df = pd.DataFrame(columns=['Method', 'Train R2', 'Test R2', 'Train RMSE', 'Test RMSE'])
results_df.loc[len(results_df)] = result_lr
results_df.loc[len(results_df)] = result_back
results_df.loc[len(results_df)] = result_forward
results_df.loc[len(results_df)] = result_rfe
results_df.sort_values('Test R2',ascending=False)

In [None]:
from sklearn.model_selection import KFold

In [None]:
# for not use only for understanding
temp_data=[30,31,32,33,34,35,36,37,38,39]
kf=KFold(n_splits=5)
list(kf.split(temp_data))

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
# using cross_val_score function to perform K-fold cross validation
score= cross_val_score(lr,X_train,y_train,cv=5,scoring='r2')
print('cross validate score ', score)
print('Average score ',np.average(score))

In [None]:

lr.fit(X_train,y_train)
lr.score(X_test,y_test)

In [None]:
temp_pred= lr.predict(X_test)
r2_score(y_test,temp_pred)

In [None]:
# LOOCV
from sklearn.model_selection import LeaveOneOut

In [None]:
loocv=LeaveOneOut()

In [None]:
rmse_loocv = []

for train_index, test_index in loocv.split(X_train, y_train):
    X_train_k = X_train.iloc[train_index]
    X_test_k  = X_train.iloc[test_index]
    
    y_train_k = y_train.iloc[train_index]
    y_test_k  = y_train.iloc[test_index]
    
    lr_k = LinearRegression()
    lr_k.fit(X_train_k, y_train_k)
    y_pred_k = lr_k.predict(X_test_k)
    rmse_K = np.sqrt(mean_squared_error(y_test_k, y_pred_k) )
    #print('RMSE ', rmse_K )
    rmse_loocv.append(rmse_K)

print(rmse_loocv)

# DAY_4

In [101]:

pd.set_option('display.max_rows',None)
df_housing=pd.read_csv('hRUoRL.csv')
df_housing.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [None]:
# not required 
# X=df1.drop('PRICE',axis=1)
# y=df1['PRICE']
# Xc=sm.add_constant(X)

In [None]:
# ols_model=sm.OLS(y,Xc).fit()
# ols_model.summary()

In [None]:
# Xc_temp=Xc.drop('AGE',axis=1)
# temp_model=sm.OLS(y,Xc).fit()

In [None]:
# print('R2 before drop ', ols_model.rsquared )
# print('adj R2 before drop ', ols_model.rsquared_adj )


In [None]:
# print('R2 before drop ', temp_model.rsquared )
# print('adj R2 before drop ', temp_model.rsquared_adj )


In [100]:
# scaling on Xs ----means X1,X2,-----Xn
df_housing_numeric=df_housing[['CRIM','ZN','INDUS','NOX','RM','AGE','DIS','RAD','TAX','PTRATIO','B','LSTAT']]
df_housing_numeric

Unnamed: 0,CRIM,ZN,INDUS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33
5,0.02985,0.0,2.18,0.458,6.43,58.7,6.0622,3,222,18.7,394.12,5.21
6,0.08829,12.5,7.87,0.524,6.012,66.6,5.5605,5,311,15.2,395.6,12.43
7,0.14455,12.5,7.87,0.524,6.172,96.1,5.9505,5,311,15.2,396.9,19.15
8,0.21124,12.5,7.87,0.524,5.631,100.0,6.0821,5,311,15.2,386.63,29.93
9,0.17004,12.5,7.87,0.524,6.004,85.9,6.5921,5,311,15.2,386.71,17.1


In [103]:
df_housing_numeric1=df_housing.drop(['CHAS','PRICE'],axis=1)
df_housing_numeric1

Unnamed: 0,CRIM,ZN,INDUS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33
5,0.02985,0.0,2.18,0.458,6.43,58.7,6.0622,3,222,18.7,394.12,5.21
6,0.08829,12.5,7.87,0.524,6.012,66.6,5.5605,5,311,15.2,395.6,12.43
7,0.14455,12.5,7.87,0.524,6.172,96.1,5.9505,5,311,15.2,396.9,19.15
8,0.21124,12.5,7.87,0.524,5.631,100.0,6.0821,5,311,15.2,386.63,29.93
9,0.17004,12.5,7.87,0.524,6.004,85.9,6.5921,5,311,15.2,386.71,17.1


In [107]:
x_scaler=StandardScaler()
num_scaled=x_scaler.fit_transform(df_housing_numeric)
df_num_scaled=pd.DataFrame(num_scaled,columns=df_housing_numeric.columns)
df_num_scaled.head()

Unnamed: 0,CRIM,ZN,INDUS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,-0.419782,0.28483,-1.287909,-0.144217,0.413672,-0.120013,0.140214,-0.982843,-0.666608,-1.459,0.441052,-1.075562
1,-0.417339,-0.487722,-0.593381,-0.740262,0.194274,0.367166,0.55716,-0.867883,-0.987329,-0.303094,0.441052,-0.492439
2,-0.417342,-0.487722,-0.593381,-0.740262,1.282714,-0.265812,0.55716,-0.867883,-0.987329,-0.303094,0.396427,-1.208727
3,-0.41675,-0.487722,-1.306878,-0.835284,1.016303,-0.809889,1.077737,-0.752922,-1.106115,0.113032,0.416163,-1.361517
4,-0.412482,-0.487722,-1.306878,-0.835284,1.228577,-0.51118,1.077737,-0.752922,-1.106115,0.113032,0.441052,-1.026501


In [120]:
X=pd.concat([df_num_scaled,df_housing['CHAS']],axis=1)
y=df_housing['PRICE']

In [121]:
lr=LinearRegression()
lr.fit(X_train,y_train)
y_pred_train=lr.predict(X_train)
y_pred_test=lr.predict(X_test)
train_r2=r2_score(y_train,y_pred_train)
test_r2=r2_score(y_test,y_pred_test)
print('Train R2 =',train_r2)
print('Test R2 =',test_r2)

Train R2 = 0.7645451026942549
Test R2 = 0.6733825506400194


In [122]:
# X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)

In [123]:
from sklearn.linear_model import Ridge,Lasso,ElasticNet
import warnings

# Ridge model

In [124]:
ridge_model=Ridge(alpha=15)
ridge_model.fit(X_train,y_train)
y_pred_train=ridge_model.predict(X_train)
y_pred_test=ridge_model.predict(X_test)
train_r2=r2_score(y_train,y_pred_train)
test_r2=r2_score(y_test,y_pred_test)
print('Train R2 =',train_r2)
print('Test R2 =',test_r2)

Train R2 = 0.7615932777564829
Test R2 = 0.6633646941226767


In [138]:
ridge_model=Ridge(alpha=20)
ridge_model.fit(X_train,y_train)
y_pred_train=ridge_model.predict(X_train)
y_pred_test=ridge_model.predict(X_test)
train_r2=r2_score(y_train,y_pred_train)
test_r2=r2_score(y_test,y_pred_test)
print('Train R2 =',train_r2)
print('Test R2 =',test_r2)

Train R2 = 0.7601072635296631
Test R2 = 0.6603031973623346


In [125]:
lr.coef_

array([-1.04242529,  1.03604084,  0.0777311 , -1.8789804 ,  2.70876672,
       -0.28079317, -3.1560096 ,  2.10632153, -1.86413286, -2.20120069,
        0.62151065, -3.47239016,  2.51124642])

In [127]:
ridge_model.coef_

array([-0.94710626,  0.82124672, -0.15078425, -1.44977417,  2.81800252,
       -0.30650829, -2.6529765 ,  1.34303025, -1.20693677, -2.12771833,
        0.64648273, -3.2798639 ,  1.62293063])

# Lasso model

In [129]:
lasso_model=Lasso(alpha=1)
lasso_model.fit(X_train,y_train)
y_pred_train=lasso_model.predict(X_train)
y_pred_test=lasso_model.predict(X_test)
train_r2=r2_score(y_train,y_pred_train)
test_r2=r2_score(y_test,y_pred_test)
print('Train R2 =',train_r2)
print('Test R2 =',test_r2)

Train R2 = 0.6927797210709747
Test R2 = 0.5987944381069534


In [139]:
lasso_model=Lasso(alpha=20)
lasso_model.fit(X_train,y_train)
y_pred_train=lasso_model.predict(X_train)
y_pred_test=lasso_model.predict(X_test)
train_r2=r2_score(y_train,y_pred_train)
test_r2=r2_score(y_test,y_pred_test)
print('Train R2 =',train_r2)
print('Test R2 =',test_r2)

Train R2 = 0.0
Test R2 = -0.0060197319476869016


In [130]:
lasso_model.coef_

array([-0.        ,  0.        , -0.        , -0.        ,  2.6760258 ,
       -0.        , -0.        , -0.        , -0.1459351 , -1.76844418,
        0.        , -3.41737803,  0.        ])

# ElasticNet


In [132]:
em_model= ElasticNet(alpha=1,l1_ratio=0.5)
em_model.fit(X_train,y_train)
y_pred_train=em_model.predict(X_train)
y_pred_test=em_model.predict(X_test) 
train_r2= r2_score(y_train,y_pred_train)
test_r2=r2_score(y_test,y_pred_test)
print('Train R2 =',train_r2)
print('Test R2 =',test_r2)

Train R2 = 0.672564954139294
Test R2 = 0.5710793706444866


In [140]:
param={'alpha':[0.01,0.1,0.3,0.5],
      'l1_ratio':[0.3,0.5,0.7,0.9]}
algo_name=ElasticNet()
grid_cv=GridSearchCV(estimator=algo_name,param_grid=param,cv=5,scoring='r2')
grid_cv.fit(X_train,y_train)
grid_cv.best_params_

{'alpha': 0.1, 'l1_ratio': 0.3}

In [146]:
param={'alpha':[0.3,0.5,1,0.7,0.9],
      'l1_ratio':[1,2,0.5,0.7,0.9]}
algo_name=ElasticNet()
grid_cv=GridSearchCV(estimator=algo_name,param_grid=param,cv=5,scoring='r2')
grid_cv.fit(X_train,y_train)
grid_cv.best_params_

{'alpha': 0.3, 'l1_ratio': 0.5}

In [145]:
#just for try
param={'alpha':[1,3,5],
      'l1_ratio':[0.13,0.15,0.7,0.94]}
algo_name=ElasticNet()
grid_cv=GridSearchCV(estimator=algo_name,param_grid=param,cv=5,scoring='r2')
grid_cv.fit(X_train,y_train)
grid_cv.best_params_

{'alpha': 1, 'l1_ratio': 0.94}

# Grid Search Ridge

In [134]:

from sklearn.model_selection import GridSearchCV

In [135]:
param={'alpha':[0.01,0.1,1,10]}
model=Ridge()
grid_cv=GridSearchCV(estimator=model,param_grid=param,cv=5,scoring='r2')
grid_cv.fit(X_train,y_train)
grid_cv.best_params_

{'alpha': 10}

In [136]:
param={'alpha':[0.01,0.1,1,10,20,30]}
model=Ridge()
grid_cv=GridSearchCV(estimator=model,param_grid=param,cv=5,scoring='r2')
grid_cv.fit(X_train,y_train)
grid_cv.best_params_

{'alpha': 20}