In [1]:
import numpy as np

In [46]:
class Lasso_Regression():
    
    def __init__(self, no_of_iteration, learning_rate, lambda_parameter):
        self.no_of_iteration = no_of_iteration
        self.learning_rate = learning_rate
        self.lambda_parameter = lambda_parameter  # fixed name
    
    def fit(self, x, y):
        # Convert to NumPy if it's a DataFrame
        if hasattr(x, 'values'):
            x = x.values
        if hasattr(y, 'values'):
            y = y.values

        self.m, self.n = x.shape
        self.w = np.zeros(self.n)
        self.b = 0
        
        self.x = x
        self.y = y
        
        for _ in range(self.no_of_iteration):
            self.update_weights()
            
    def update_weights(self):
        y_pred = self.predict(self.x)
        dw = np.zeros(self.n)

        for i in range(self.n):
            if self.w[i] > 0:
                dw[i] = (-(2 * (self.x[:, i]).dot(self.y - y_pred)) + self.lambda_parameter) / self.m
            else:
                dw[i] = (-(2 * (self.x[:, i]).dot(self.y - y_pred)) - self.lambda_parameter) / self.m
            
        db = -2 * np.sum(self.y - y_pred) / self.m
        
        # update
        self.w -= self.learning_rate * dw
        self.b -= self.learning_rate * db
        
    def predict(self, x):
        # Convert if DataFrame
        if hasattr(x, 'values'):
            x = x.values
        return x.dot(self.w) + self.b


In [47]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import metrics

In [48]:
df = pd.read_csv("D:/Dataset for ML/salary_data.csv")

In [49]:
df.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891


In [50]:
df.tail()

Unnamed: 0,YearsExperience,Salary
25,9.0,105582
26,9.5,116969
27,9.6,112635
28,10.3,122391
29,10.5,121872


In [51]:
df.isnull().sum()

YearsExperience    0
Salary             0
dtype: int64

In [52]:
df.shape

(30, 2)

In [60]:

features = df.iloc[:,:-1].values      
target = df.iloc[:,1].values
     

In [62]:
x_train , x_test, y_train , y_test = train_test_split(features, target, test_size=0.2, random_state=42)

In [65]:
print(features.shape, x_train.shape, x_test.shape)

(30, 1) (24, 1) (6, 1)


In [68]:
model = Lasso_Regression(no_of_iteration=1000, learning_rate=0.02, lambda_parameter=200)

In [69]:
model.fit(x_train, y_train)

In [71]:
train_pred = model.predict(x_train)

In [72]:
print(train_pred)

[122385.47654131 107307.65246295  63016.54423277  35687.98809075
  46053.99214462  73382.54828665  52650.5401789   63958.90823767
  63016.54423277  99768.74042378  37572.71610054  39457.44411034
 110134.74447765  44169.26413482  92229.8283846  114846.56450213
  80921.46032583 124270.20455111  89402.7363699   55477.63219359
  62074.18022788  67728.36425726  81863.82433072  53592.9041838 ]


In [73]:
test_pred = model.predict(x_test)

In [74]:
print(test_pred)

[115788.92850703  71497.82027685 102595.83243847  75267.27629644
  55477.63219359  60189.45221808]


In [75]:
from sklearn import metrics

In [76]:
score_1 = metrics.r2_score(y_test, test_pred)

score_2 = metrics.mean_absolute_error(y_test, test_pred)

print(score_1)
print(score_2)

0.902439150084887
6286.410012243672


In [77]:
from sklearn.linear_model import Lasso
sk_model = Lasso()

In [78]:
sk_model.fit(x_train, y_train)

0,1,2
,alpha,1.0
,fit_intercept,True
,precompute,False
,copy_X,True
,max_iter,1000
,tol,0.0001
,warm_start,False
,positive,False
,random_state,
,selection,'cyclic'


In [80]:
sk_test_data_prediction = sk_model.predict(x_test)

In [81]:
# R squared error
score_1_ = metrics.r2_score(y_test, sk_test_data_prediction)

# Mean Absolute Error
score_2_ = metrics.mean_absolute_error(y_test, sk_test_data_prediction)

print("R squared error : ", score_1_)
print('Mean Absolute Error : ', score_2_)

R squared error :  0.9024455162895476
Mean Absolute Error :  6286.423714244487
