<a href="https://colab.research.google.com/github/Sachithananthan-1/Building_Lasso_Regression_from_Scratch/blob/main/Building_Lasso_Regression_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import metrics


In [15]:
class Lasso_Regression():

  # Initiating the hyperparameters
   def __init__(self,learning_rate,no_of_iterations,lambda_parameter):

    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations
    self.lambda_parameter = lambda_parameter

  # fitting the dataset in SVM classifier
   def fit(self,X,Y):
    self.m, self.n = X.shape

    self.w = np.zeros(self.n)
    self.b = 0
    self.X = X
    self.Y = Y

    # Implementing Gradient Descent Algorithm
    for i in range(self.no_of_iterations):
      self.update_weights()

   def update_weights(self):

    # Label Encoding
    Y_prediction = self.predict(self.X)

    # gradients for weights
    dw = np.zeros(self.n)
    for i in range(self.n):
      if self.w[i]>0:
        dw[i] = (-(2*(self.X[:,i].dot(self.Y-Y_prediction) + self.lambda_parameter)))/self.m

    else:
      dw[i] = (-(2*(self.X[:,i].dot(self.Y-Y_prediction) - self.lambda_parameter)))/self.m

    # gradient for bias
    db = -2*np.sum(self.Y-Y_prediction)/self.m

    self.w = self.w - self.learning_rate*dw
    self.b = self.b - self.learning_rate*db

   def predict(self,X):
    return X.dot(self.w) + self.b

In [7]:
salary = pd.read_csv('/content/salary_data.csv')
salary.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891


In [8]:
salary.shape

(30, 2)

In [9]:
salary.isnull().sum()

YearsExperience    0
Salary             0
dtype: int64

In [11]:
# Splitting the feature & target

X = salary.iloc[:,:-1].values
Y = salary.iloc[:,1].values
print(X)

[[ 1.1]
 [ 1.3]
 [ 1.5]
 [ 2. ]
 [ 2.2]
 [ 2.9]
 [ 3. ]
 [ 3.2]
 [ 3.2]
 [ 3.7]
 [ 3.9]
 [ 4. ]
 [ 4. ]
 [ 4.1]
 [ 4.5]
 [ 4.9]
 [ 5.1]
 [ 5.3]
 [ 5.9]
 [ 6. ]
 [ 6.8]
 [ 7.1]
 [ 7.9]
 [ 8.2]
 [ 8.7]
 [ 9. ]
 [ 9.5]
 [ 9.6]
 [10.3]
 [10.5]]


In [12]:
print(Y)

[ 39343  46205  37731  43525  39891  56642  60150  54445  64445  57189
  63218  55794  56957  57081  61111  67938  66029  83088  81363  93940
  91738  98273 101302 113812 109431 105582 116969 112635 122391 121872]


In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state = 2)

In [16]:
# Training the Linear Regression model

model = Lasso_Regression(learning_rate = 0.02, no_of_iterations=1000,lambda_parameter=200)
model.fit(X_train, Y_train)

In [17]:
test_data_prediction = model.predict(X_test)
print(test_data_prediction)

[ 36071.68106775  34169.04853406  66513.80160688  58903.2714721
  91248.02454493  80783.5456096  101712.50348025  52244.05760416
  42730.89493569  88394.07574438]


In [18]:
# R squared error
score_1 = metrics.r2_score(Y_test, test_data_prediction)

# Mean Absolute Error
score_2 = metrics.mean_absolute_error(Y_test, test_data_prediction)

print("R squared error : ", score_1)
print('Mean Absolute Error : ', score_2)

R squared error :  0.9041444106355803
Mean Absolute Error :  6674.924155816292
