<a href="https://colab.research.google.com/github/RajRover/ML_Projects/blob/main/Lasso_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np

In [7]:
import numpy as np

class Lasso_Regression():
    def __init__(self, lambda_parameter, no_of_iterations, learning_rate):
        self.lambda_parameter = lambda_parameter
        self.no_of_iterations = no_of_iterations
        self.learning_rate = learning_rate

    def fit(self, X, Y):
        self.m, self.n = X.shape
        self.w = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y

        for i in range(self.no_of_iterations):
            self.update_weights()

    def update_weights(self):
        Y_prediction = self.predict(self.X)
        dw = np.zeros(self.n)
        db = (-2 * np.sum(self.Y - Y_prediction)) / self.m

        for i in range(self.n):
            # Gradient of loss w.r.t. w[i]
            if self.w[i] > 0:
                dw[i] = (-2 * np.dot(self.X[:, i], (self.Y - Y_prediction)) / self.m) + self.lambda_parameter
            elif self.w[i] < 0:
                dw[i] = (-2 * np.dot(self.X[:, i], (self.Y - Y_prediction)) / self.m) - self.lambda_parameter
            else:
                dw[i] = (-2 * np.dot(self.X[:, i], (self.Y - Y_prediction)) / self.m)

        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

    def predict(self, X):
        return X.dot(self.w) + self.b


Implementation

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [9]:
salary_dataset=pd.read_csv('/salary_data.csv')
salary_dataset.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891


In [10]:
salary_dataset.shape

(30, 2)

In [11]:
salary_dataset.describe()

Unnamed: 0,YearsExperience,Salary
count,30.0,30.0
mean,5.313333,76003.0
std,2.837888,27414.429785
min,1.1,37731.0
25%,3.2,56720.75
50%,4.7,65237.0
75%,7.7,100544.75
max,10.5,122391.0


In [12]:
salary_dataset.isnull().sum()

Unnamed: 0,0
YearsExperience,0
Salary,0


In [13]:
X=salary_dataset.iloc[:,:-1].values
Y=salary_dataset.iloc[:,1].values

In [14]:
print(X)

[[ 1.1]
 [ 1.3]
 [ 1.5]
 [ 2. ]
 [ 2.2]
 [ 2.9]
 [ 3. ]
 [ 3.2]
 [ 3.2]
 [ 3.7]
 [ 3.9]
 [ 4. ]
 [ 4. ]
 [ 4.1]
 [ 4.5]
 [ 4.9]
 [ 5.1]
 [ 5.3]
 [ 5.9]
 [ 6. ]
 [ 6.8]
 [ 7.1]
 [ 7.9]
 [ 8.2]
 [ 8.7]
 [ 9. ]
 [ 9.5]
 [ 9.6]
 [10.3]
 [10.5]]


In [15]:
print(Y)

[ 39343  46205  37731  43525  39891  56642  60150  54445  64445  57189
  63218  55794  56957  57081  61111  67938  66029  83088  81363  93940
  91738  98273 101302 113812 109431 105582 116969 112635 122391 121872]


In [19]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.33,random_state=2)

In [32]:
model = Lasso_Regression(lambda_parameter=200, no_of_iterations=1000, learning_rate=0.02)

In [33]:
model.fit(X_train,Y_train)

In [34]:
Y_train_pred=model.predict(X_train)
print(Y_train_pred)

[ 51324.88857827 114988.40737036  61777.10808145  44673.47616715
  60826.90630843  72229.32758462 121639.81978147 109287.19673226
  74129.73113066  38022.06375604  54175.49389732 114038.20559734
 106436.59141321  79830.94176876  61777.10808145  98834.97722908
 123540.2233275   62727.30985446  70328.92403859  54175.49389732]


In [35]:
test_data_prediction=model.predict(X_test)

In [36]:
from sklearn import metrics
score_1=metrics.r2_score(Y_test, test_data_prediction)
score_2=metrics.mean_absolute_error(Y_test,test_data_prediction)
print("R squared error : ", score_1)
print('Mean Absolute Error : ', score_2)

R squared error :  0.9042823684154148
Mean Absolute Error :  6666.669300178942


Comparing the error with SKLearn Lasso Regression

In [37]:
from sklearn.linear_model import Lasso

In [38]:
skmodel=Lasso()

In [39]:
skmodel.fit(X_train,Y_train)

In [42]:
test_data=skmodel.predict(X_test)
score1=metrics.r2_score(Y_test,test_data)
score2=metrics.mean_absolute_error(Y_test,test_data)

In [43]:
print(score1)
print(score2)

0.9041817142395875
6673.480440148538
