In [1]:
import numpy as np
import pandas as pd
import pickle
import math
from sklearn import linear_model
from sklearn.metrics import mean_squared_error as mse
import warnings
warnings.filterwarnings('ignore')

In [2]:
X_train = pd.read_pickle('X_train.pkl')
X_cv = pd.read_pickle('X_cv.pkl')
X_test = pd.read_pickle('X_test.pkl')

In [3]:
y_train = X_train['sales']
y_cv = X_cv['sales']
y_test = X_test['sales']

# We are drpping the features which are not required.
X_train.drop(['sales'],axis = 1,inplace = True)
X_cv.drop(['sales'],axis = 1,inplace = True)
X_test.drop(['sales'],axis = 1,inplace = True)

print(X_train.shape,y_train.shape)
print(X_cv.shape,y_cv.shape)
print(X_test.shape, y_test.shape)

(13781480, 28) (13781480,)
(853720, 28) (853720,)
(853720, 28) (853720,)


#### Lasso Regression

In [6]:
alphas = [0.001,0.01,0.1,0.5,1]
for i in alphas:
    model2=linear_model.Lasso(alpha=i)
    model2.fit(X_train,y_train)
    y_pred_train = model2.predict(X_train)
    y_pred_cv = model2.predict(X_cv)
    rmse = math.sqrt(mse(y_train,y_pred_train))
    print(f"Train error of Lasso regression with alpha {i} = {rmse}")
    rmse = math.sqrt(mse(y_cv,y_pred_cv))
    print(f"Validation error of Lasso regression with alpha {i} = {rmse}")
    print("*"*90)

Train error of Lasso regression with alpha 0.001 = 1.8585271745634224
Validation error of Lasso regression with alpha 0.001 = 1.8136752034702524
******************************************************************************************
Train error of Lasso regression with alpha 0.01 = 1.859374166536545
Validation error of Lasso regression with alpha 0.01 = 1.814647454735836
******************************************************************************************
Train error of Lasso regression with alpha 0.1 = 1.8711556760857577
Validation error of Lasso regression with alpha 0.1 = 1.8266335296525686
******************************************************************************************
Train error of Lasso regression with alpha 0.5 = 1.881572647561544
Validation error of Lasso regression with alpha 0.5 = 1.836304453437755
******************************************************************************************
Train error of Lasso regression with alpha 1 = 1.9042779070569762
Vali

In [7]:
# Re-training the model with the best alpha value.
model1 = linear_model.Lasso(alpha=0.001)
model1.fit(X_train, y_train)
y_pred = model1.predict(X_test)
y_pred_train = model1.predict(X_train)
rmse = math.sqrt(mse(y_train,y_pred_train))
print(f"For alpha value 0.001, the train RMSE score is {rmse}")
rmse = math.sqrt(mse(y_test,y_pred))
print(f"For alpha value 0.001, the test RMSE score is {rmse}")

For alpha value 0.001, the train RMSE score is 1.8585271745634224
For alpha value 0.001, the test RMSE score is 1.811625828895387


#### Ridge Regression

In [8]:
alphas = [0.001,0.01,0.1,0.5,1]
for i in alphas:
    model2=linear_model.Ridge(alpha=i)
    model2.fit(X_train,y_train)
    y_pred_train = model2.predict(X_train)
    y_pred_cv = model2.predict(X_cv)
    rmse = math.sqrt(mse(y_train,y_pred_train))
    print(f"Train error of ridge regression with alpha {i} = {rmse}")
    rmse = math.sqrt(mse(y_cv,y_pred_cv))
    print(f"Validation error of ridge regression with alpha {i} = {rmse}")
    print("*"*90)

Train error of ridge regression with alpha 0.001 = 1.8585033136584053
Validation error of ridge regression with alpha 0.001 = 1.8136516726845713
******************************************************************************************
Train error of ridge regression with alpha 0.01 = 1.8585033136584053
Validation error of ridge regression with alpha 0.01 = 1.8136516726845713
******************************************************************************************
Train error of ridge regression with alpha 0.1 = 1.8585033136584053
Validation error of ridge regression with alpha 0.1 = 1.8136516726845713
******************************************************************************************
Train error of ridge regression with alpha 0.5 = 1.8585033778010336
Validation error of ridge regression with alpha 0.5 = 1.8136518041423224
******************************************************************************************
Train error of ridge regression with alpha 1 = 1.8585033136584053


In [9]:
# Re-training our model with the best alpha value.
model2 = linear_model.Ridge(alpha=1)
model2.fit(X_train, y_train)
y_pred_train = model2.predict(X_train)
rmse = math.sqrt(mse(y_train,y_pred_train))
print(f"For alpha value 0.001, the train RMSE score is {rmse}")
y_pred = model2.predict(X_test)
rmse = math.sqrt(mse(y_test,y_pred))
print(f"For alpha value 0.001, the test RMSE score is {rmse}")

For alpha value 0.001, the train RMSE score is 1.8585033136584053
For alpha value 0.001, the test RMSE score is 1.8114981019749543
