## Ridge and Lasso Regression

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [3]:
df=pd.read_csv('./Data_set/50_Startups.csv')
df.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [4]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()

In [5]:
df.State=le.fit_transform(df.State)


In [6]:
df.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,2,192261.83
1,162597.7,151377.59,443898.53,0,191792.06
2,153441.51,101145.55,407934.54,1,191050.39
3,144372.41,118671.85,383199.62,2,182901.99
4,142107.34,91391.77,366168.42,1,166187.94


In [8]:
X=df.drop(columns=['Profit'],axis=1)
X.head()
y=df['Profit']
y.head()

0    192261.83
1    191792.06
2    191050.39
3    182901.99
4    166187.94
Name: Profit, dtype: float64

In [10]:
from sklearn.preprocessing import MinMaxScaler
scale=MinMaxScaler()

In [11]:
X_scaled=pd.DataFrame(scale.fit_transform(X),columns=X.columns)
X_scaled.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State
0,1.0,0.651744,1.0,1.0
1,0.983359,0.761972,0.940893,0.0
2,0.927985,0.379579,0.864664,0.5
3,0.873136,0.512998,0.812235,1.0
4,0.859438,0.305328,0.776136,0.5


In [24]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X_scaled,y,test_size=0.2,random_state=0)

In [25]:
X_train.shape

(86, 4)

In [26]:
X_test.shape


(22, 4)

## Model building

In [27]:
from sklearn.linear_model import Ridge,Lasso

In [28]:
r=Ridge()
l=Lasso()

In [29]:
r.fit(X_train,y_train)
l.fit(X_train,y_train)

In [30]:
pred1=r.predict(X_test)
pred2=l.predict(X_test)
display(pred1)
display(pred2)

array([ 54556.32416702, 130017.92166782,  84687.15947095, 173295.2223158 ,
       108917.94957822, 128735.89224253, 128736.35934265, 155951.19177423,
       117814.48562718,  52712.59507338, 102790.3781561 , 119096.2726001 ,
        54556.32416702, 124206.72612243,  88379.01243395, 126261.35613731,
       126261.35613731,  98802.1865801 ,  74278.88209886, 141546.67661999,
       145564.21281487, 150251.73759042])

array([ 48384.86814735, 134845.52354938,  76486.64641608, 181551.13594979,
       112961.07382208, 134236.64101991, 129218.98004997, 160017.16104325,
       116754.23112994,  46273.04713164, 102272.49339834, 115567.13437352,
        48384.86814735, 119116.48630482,  88593.22703248, 127104.80005829,
       127104.80005829,  90948.41312188,  58678.78647171, 146299.80323437,
       149413.8490298 , 152502.10158276])

In [31]:
pred1_train=r.predict(X_train)
pred2_train=l.predict(X_train)

In [32]:
df_new=pd.DataFrame({'Actual':y_test,'Predicted_ridge':pred1,'Predicted_lasso':pred2})
display(df_new.head())

Unnamed: 0,Actual,Predicted_ridge,Predicted_lasso
84,64926.08,54556.324167,48384.868147
10,146121.95,130017.921668,134845.523549
75,90708.19,84687.159471,76486.646416
2,191050.39,173295.222316,181551.13595
24,108552.04,108917.949578,112961.073822


In [33]:
from sklearn import metrics


In [34]:
# r2_score
display('Ridge',metrics.r2_score(y_test,pred1))
display('Lasso',metrics.r2_score(y_test,pred2))

'Ridge'

0.9095565216441847

'Lasso'

0.9259035724996549

In [35]:
# RMSE
display('Ridge',np.sqrt(metrics.mean_squared_error(y_test,pred1)))
display('Lasso',np.sqrt(metrics.mean_squared_error(y_test,pred2)))

'Ridge'

10825.266082933194

'Lasso'

9798.251585438255

In [36]:
# training accuracy
display('Ridge',metrics.r2_score(y_train,pred1_train))
display('Lasso',metrics.r2_score(y_train,pred2_train))

'Ridge'

0.920686926857262

'Lasso'

0.937696968189804