In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.linear_model import LinearRegression, Lasso, Ridge 
from sklearn.model_selection import KFold, cross_val_score, train_test_split

In [2]:
x_train=pd.read_csv("E:\Courses\Data science(ETLHive)\dataset\cleaned_train_X.csv")
y_train=pd.read_csv("E:\Courses\Data science(ETLHive)\dataset\cleaned_train_y.csv")

In [3]:
#lets create function for cross validation
def rmsle_cv(model):
    kf=KFold(5,shuffle=True,random_state=0).get_n_splits(x_train)
    rmse=np.sqrt(-cross_val_score(model,x_train,y_train,cv=kf,scoring="neg_mean_squared_error"))
    return rmse

### Linear model

In [4]:
lr=LinearRegression()
lr.fit(x_train,y_train)
lr_score=rmsle_cv(lr)
print("Linear regression=",round(lr_score.mean(),3))

Linear regression= 423853364376.486


### Lasso (using default alpha=1)

In [5]:
lasso = make_pipeline(RobustScaler(), Lasso(max_iter=5000, random_state=0))
lasso.fit(x_train,y_train)
lasso_score=rmsle_cv(lasso)

In [6]:
print("Lasso score=",round(lasso_score.mean(),3))

Lasso score= 0.386


### Lasso using diff values of alpha

In [7]:
alpha_val=[0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10]

for alpha in alpha_val:
    lasso = make_pipeline(RobustScaler(), Lasso(alpha=alpha,max_iter=5000, random_state=0))
    lasso.fit(x_train,y_train)
    lasso_score=rmsle_cv(lasso)
    print("Lasso score=",round(lasso_score.mean(),3),"\talpha=",alpha)
    

Lasso score= 0.151 	alpha= 0.0001
Lasso score= 0.147 	alpha= 0.0005
Lasso score= 0.145 	alpha= 0.001
Lasso score= 0.142 	alpha= 0.005
Lasso score= 0.146 	alpha= 0.01
Lasso score= 0.193 	alpha= 0.05
Lasso score= 0.247 	alpha= 0.1
Lasso score= 0.377 	alpha= 0.5
Lasso score= 0.386 	alpha= 1
Lasso score= 0.399 	alpha= 5
Lasso score= 0.399 	alpha= 10


### Ridge regression(alpha=1)

In [8]:
ridge = make_pipeline(RobustScaler(), Ridge(max_iter=5000, random_state=0))
ridge.fit(x_train,y_train)
ridge_score=rmsle_cv(ridge)
print("Ridge score=",round(ridge_score.mean(),3))

Ridge score= 0.153


### Ridge diff values of alpha

In [9]:
alpha_values = [0.1, 0.5, 1, 5, 10, 15, 20, 50, 100, 200, 500, 1000]

for alpha in alpha_values:
    ridge = make_pipeline(RobustScaler(), Ridge(max_iter=5000, random_state=0,alpha=alpha))
    ridge.fit(x_train,y_train)
    ridge_score=rmsle_cv(ridge)
    print("Ridge score=",round(ridge_score.mean(),3),"\talpha=",alpha)
    

Ridge score= 0.153 	alpha= 0.1
Ridge score= 0.153 	alpha= 0.5
Ridge score= 0.153 	alpha= 1
Ridge score= 0.152 	alpha= 5
Ridge score= 0.15 	alpha= 10
Ridge score= 0.149 	alpha= 15
Ridge score= 0.149 	alpha= 20
Ridge score= 0.146 	alpha= 50
Ridge score= 0.143 	alpha= 100
Ridge score= 0.14 	alpha= 200
Ridge score= 0.14 	alpha= 500
Ridge score= 0.146 	alpha= 1000


In [None]:
ridge.