In [None]:
import numpy as np
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression , Ridge, Lasso
from sklearn.preprocessing import StandardScaler , MinMaxScaler
from sklearn.metrics import r2_score, mean_squared_error

import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
data = {'Years of Experience':[1,2,3,4,5,6,7,8,9,10],
        'Salary($)':[45000,50000,60000,80000,110000,150000,200000,300000,500000,1000000]}

data = pd.DataFrame(data)

data.head()


Unnamed: 0,Years of Experience,Salary($)
0,1,45000
1,2,50000
2,3,60000
3,4,80000
4,5,110000


In [None]:
px.scatter(data,x='Years of Experience',y='Salary($)',template='ggplot2')

In [None]:
scaler = StandardScaler()
min_max = MinMaxScaler()

In [None]:
data2 = min_max.fit_transform(data)
data2

array([[0.        , 0.        ],
       [0.11111111, 0.0052356 ],
       [0.22222222, 0.01570681],
       [0.33333333, 0.03664921],
       [0.44444444, 0.06806283],
       [0.55555556, 0.10994764],
       [0.66666667, 0.16230366],
       [0.77777778, 0.26701571],
       [0.88888889, 0.47643979],
       [1.        , 1.        ]])

In [None]:
scaled_data = pd.DataFrame(data2,columns=['Years of Experience','Salary($)'])
scaled_data.head()

Unnamed: 0,Years of Experience,Salary($)
0,0.0,0.0
1,0.111111,0.005236
2,0.222222,0.015707
3,0.333333,0.036649
4,0.444444,0.068063


In [None]:
x = scaled_data['Years of Experience'].values.reshape(-1,1)
y = scaled_data['Salary($)']

In [None]:
reg = LinearRegression()
model = reg.fit(x,y)

In [None]:
y_hat = model.predict(x)

In [None]:
### RSS
print('RSS:     ', np.sum(np.square(y-y_hat))*100)

### MSE
print('MSE:     ', mean_squared_error(y,y_hat)*100)


### RMSE
print('RMSE:     ', mean_squared_error(y,y_hat)**0.5)

### R2
print('R2 Score:', r2_score(y,y_hat)*100)


RSS:      29.270994531815226
MSE:      2.9270994531815226
RMSE:      0.17108768083007972
R2 Score: 66.90412331929896


In [None]:
### Defining the Regression Line
import plotly.graph_objects as go
x_range = np.linspace(x.min(),x.max(),100)
y_range = model.predict(x_range.reshape(-1, 1))

In [None]:
fig = px.scatter(scaled_data,x='Years of Experience',y='Salary($)', opacity=0.65,template='ggplot2')
fig.add_traces(go.Scatter(x=x_range, y=y_range, name='Regression Fit'))
fig.show()

###Polynomial Regression

In [None]:
degree = 5
polyreg = PolynomialFeatures(degree)
poly_x = polyreg.fit_transform(x)
lr = LinearRegression()
poly_model = lr.fit(poly_x,y)
y_hat_poly = poly_model.predict(poly_x)

In [None]:
### RSS
print('RSS:     ', np.sum(np.square(y-y_hat_poly))*100)

### MSE
print('MSE:     ', mean_squared_error(y,y_hat_poly)*100)


### RMSE
print('RMSE:     ', mean_squared_error(y,y_hat_poly)**0.5)

### R2
print('R2 Score:', r2_score(y,y_hat_poly)*100)


RSS:      0.017962538726773856
MSE:      0.0017962538726773855
RMSE:      0.0042382235343093756
R2 Score: 99.97969027099755


In [None]:
x_range_2 = np.linspace(x.min(),x.max(),100)
y_range_2 = poly_model.predict(polyreg.fit_transform(x_range.reshape(-1,1)))

In [None]:
fig = px.scatter(scaled_data,x='Years of Experience',y='Salary($)', opacity=0.65,template='ggplot2')
fig.add_traces(go.Scatter(x=x_range, y=y_range, name='Regression Fit'))
fig.add_traces(go.Scatter(x=x_range_2, y=y_range_2, name='Polynomial Fit'))
fig.show()

### Ridge Regression

In [None]:
x_range_ridge = np.linspace(x.min(),x.max(),100)

lambdas = [0 ,0.001, 0.01 ,0.1 , 1 ] ### List of lambda values

### iterating over lambda values
for i in lambdas:
  degree = 5
  poly_trans = PolynomialFeatures(degree) ### Initializing the polynomial transform
  poly_x = poly_trans.fit_transform(x) ###Transforming the independent variables
  ridge = Ridge(alpha = i) ### Initalizing Ridge to iterate over diffrent values of lambdas
  model = ridge.fit(poly_x,y) ### Fitting the model on tge transsformed data

  ### Predicting Y
  y_hat_ridge = model.predict(poly_x)

  ### R2 Score
  print("R2 Score for Lambda = " + str(i),'is: ' ,r2_score(y,y_hat_ridge)*100)
  print("Coefficents : ", model.coef_)


  y_range_ridge = model.predict(poly_trans.fit_transform(x_range.reshape(-1,1)))

  fig = px.scatter(scaled_data,x='Years of Experience',y='Salary($)', opacity=0.65,template='ggplot2')
  fig.add_traces(go.Scatter(x=x_range, y=y_range, name='Regression Fit'))
  fig.add_traces(go.Scatter(x=x_range_ridge, y=y_range_ridge, name='Ridge Fit'))
  fig.show()

R2 Score for Lambda = 0 is:  99.97969027099755
Coefficents :  [  0.           0.29658624  -2.96735071  12.18024933 -18.26036503
   9.7503383 ]


R2 Score for Lambda = 0.001 is:  99.47268201453204
Coefficents :  [ 0.          0.23413966 -0.01421785 -0.57141407 -0.01794265  1.35656348]


R2 Score for Lambda = 0.01 is:  98.92589025776466
Coefficents :  [ 0.          0.12185622 -0.17562938 -0.04731598  0.30586502  0.74121918]


R2 Score for Lambda = 0.1 is:  97.54917840761891
Coefficents :  [ 0.         -0.01621411  0.05876173  0.17290679  0.28711648  0.38960513]


R2 Score for Lambda = 1 is:  91.72712077011919
Coefficents :  [0.         0.08416927 0.13145506 0.16130018 0.18155321 0.19592984]


###Lasso

In [None]:
x_range_lasso = np.linspace(x.min(),x.max(),100)

lambdas = [0 ,0.001, 0.01 ,0.1 , 1 ] ### List of lambda values

### iterating over lambda values
for i in lambdas:
  degree = 5
  poly_trans = PolynomialFeatures(degree) ### Initializing the polynomial transform
  poly_x = poly_trans.fit_transform(x) ###Transforming the independent variables
  lasso = Lasso(alpha = i) ### Initalizing Lasso to iterate over diffrent values of lambdas
  model = lasso.fit(poly_x,y) ### Fitting the model on the transsformed data

  ### Predicting Y
  y_hat_lasso = model.predict(poly_x)

  ### R2 Score
  print("R2 Score for Lambda = " + str(i),'is: ' ,r2_score(y,y_hat_lasso)*100)
  print("Coefficents : ", model.coef_)


  y_range_lasso = model.predict(poly_trans.fit_transform(x_range.reshape(-1,1)))

  fig = px.scatter(scaled_data,x='Years of Experience',y='Salary($)', opacity=0.65,template='ggplot2')
  fig.add_traces(go.Scatter(x=x_range, y=y_range, name='Regression Fit'))
  fig.add_traces(go.Scatter(x=x_range_lasso, y=y_range_lasso, name='lasso Fit'))
  fig.show()

R2 Score for Lambda = 0 is:  99.46734806584026
Coefficents :  [ 0.          0.31332209 -0.05469612 -0.86193501  0.29884445  1.30579905]


R2 Score for Lambda = 0.001 is:  98.87598480746485
Coefficents :  [0.         0.01775508 0.         0.         0.         0.91471411]


R2 Score for Lambda = 0.01 is:  97.72671967392061
Coefficents :  [0.         0.         0.         0.         0.         0.83853093]


R2 Score for Lambda = 0.1 is:  0.0
Coefficents :  [0. 0. 0. 0. 0. 0.]


R2 Score for Lambda = 1 is:  0.0
Coefficents :  [0. 0. 0. 0. 0. 0.]


Comparing all three models

In [None]:
degree = 5
poly_trans = PolynomialFeatures(degree)     ### Initializing the polynomial transform
poly_x = poly_trans.fit_transform(x)        ###Transforming the independent variables

lr = LinearRegression()                     ### Initalizing SLR
ridge = Ridge(alpha=0.01)                   ### Initalizing Ridge to iterate over diffrent values of lambdas
lasso = Lasso(alpha = 0.01)                 ### Initalizing Lasso to iterate over diffrent values of lambdas

lr_model = lr.fit(poly_x,y)
ridge_model = ridge.fit(poly_x,y)           ### Fitting the model on the transsformed data
lasso_model = lasso.fit(poly_x,y)

### Predicting Y
y_hat_lr = lr_model.predict(poly_x)
y_hat_ridge = ridge_model.predict(poly_x)
y_hat_lasso = lasso_model.predict(poly_x)

### Coefficients
print("SLR Coefficents : ", lr_model.coef_)
print("Ridge Coefficents : ", ridge_model.coef_)
print("Lasso Coefficents : ", lasso_model.coef_)


### R2 Score
print("R2 Score for SLR"      ,r2_score(y,y_hat_lr)*100)
print("R2 Score for Ridge"   ,r2_score(y,y_hat_ridge)*100)
print("R2 Score for Lasso "  ,r2_score(y,y_hat_lasso)*100)

SLR Coefficents :  [  0.           0.29658624  -2.96735071  12.18024933 -18.26036503
   9.7503383 ]
Ridge Coefficents :  [ 0.          0.12185622 -0.17562938 -0.04731598  0.30586502  0.74121918]
Lasso Coefficents :  [0.         0.         0.         0.         0.         0.83853093]
R2 Score for SLR 99.97969027099755
R2 Score for Ridge 98.92589025776466
R2 Score for Lasso  97.72671967392061


In [None]:
best_model = pd.DataFrame(index = ['Intercept','B1','B2','B3','B4','B5']
                          ,columns = ['Polynomial','Ridge','Lasso'])

best_model['Polynomial'] = lr_model.coef_
best_model['Ridge'] = ridge_model.coef_
best_model['Lasso'] = lasso_model.coef_

best_model.head()

Unnamed: 0,Polynomial,Ridge,Lasso
Intercept,0.0,0.0,0.0
B1,0.296586,0.121856,0.0
B2,-2.967351,-0.175629,0.0
B3,12.180249,-0.047316,0.0
B4,-18.260365,0.305865,0.0
