In [1]:
import numpy as np

In [11]:
class LASSO_regression():

    def __init__(self, learning_rate, no_of_iteration, lambda_parameter):
        self.learning_rate = learning_rate
        self.no_of_iteration = no_of_iteration
        self.lambda_parameter = lambda_parameter

    def fit(self, X, Y):
        self.m, self.n = X.shape

        self.w = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y

        for i in range(self.no_of_iteration):
            self.update_weights()

    def update_weights(self):
        Y_predict = self.predict(self.X)

        dw = np.zeros(self.n)
        for i in range(self.n):

            if self.w[i]>0:
                dw[i] = (-(2*(self.X[:,i]).dot(self.Y - Y_predict)) + self.lambda_parameter)/self.m
            else:
                dw[i] = (-(2*(self.X[:,i]).dot(self.Y - Y_predict)) - self.lambda_parameter)/self.m

        db = - 2 * np.sum(self.Y - Y_predict)/self.m
        
        self.w = self.w - self.learning_rate * dw
        self.b = self.b - self.learning_rate * db

    def predict(self, X):
        return X.dot(self.w) + self.b

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [15]:
salary_data = pd.read_csv('salary_data.csv')

In [17]:
salary_data

Unnamed: 0,YearsExperience,Salary
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891
5,2.9,56642
6,3.0,60150
7,3.2,54445
8,3.2,64445
9,3.7,57189


In [19]:
X = salary_data.iloc[:, :-1].values
Y = salary_data.iloc[:,1].values

In [21]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size= 0.33 , random_state=3)

In [23]:
model = LASSO_regression(learning_rate=0.01 , no_of_iteration=1000 , lambda_parameter=200)

In [25]:
model.fit(X_train, Y_train)

In [27]:
test_data_prediction = model.predict(X_test)

In [29]:
test_data_prediction

array([ 72742.27319004,  53714.15818369, 101284.44569956, 116506.93770464,
        82256.33069321,  68936.65018877,  65131.0271875 ,  40394.47767925,
        74645.08469068,  38491.66617862])

In [33]:
error_count_1 = metrics.r2_score(Y_test, test_data_prediction)
error_count_2 = metrics.mean_absolute_error(Y_test, test_data_prediction)

In [35]:
print("R Squared error", error_count_1)
print("mean_absolute_error", error_count_2)

R Squared error 0.9441275925680183
mean_absolute_error 4397.3635862943875


In [37]:
#comparing with lasso model 
from sklearn.linear_model import Lasso

In [39]:
imported_model = Lasso()

In [41]:
imported_model.fit(X_train, Y_train)

In [43]:
test_imported_model_prediction = imported_model.predict(X_test)

In [45]:
test_imported_model_prediction

array([ 72857.96567796,  53936.27199089, 101240.50620857, 116377.86115822,
        82318.8125215 ,  69073.62694055,  65289.28820313,  40691.08640994,
        74750.13504667,  38798.91704124])

In [47]:
error_count_3 = metrics.r2_score(Y_test, test_imported_model_prediction)
error_count_4 = metrics.mean_absolute_error(Y_test, test_imported_model_prediction)
print("R Squared error", error_count_3)
print("mean_absolute_error", error_count_4)

R Squared error 0.9434359026643324
mean_absolute_error 4449.235840083489
