Importing the dependencies

In [1]:
import numpy as np

Lasso Regression

In [2]:
class Lasso_Regression():
  def __init__(self,learning_rate,no_of_iterations,lambda_parameter):
    self.learning_rate=learning_rate
    self.no_of_iterations=no_of_iterations
    self.lambda_parameter=lambda_parameter

  def fit(self,X,Y):

    self.m, self.n = X.shape
    self.w = np.zeros(self.n)
    self.b = 0
    self.X = X
    self.Y = Y

    #implementing gradient desent algorithm
    for i in range(self.no_of_iterations):
      self.update_weights()

  def update_weights(self):

    #linear equation of the model
    Y_prediction = self.predict(self.X)

    #gradients (dw,db)

    #gradient for weight
    dw = np.zeros(self.n)


    for i in range(self.n):
      if self.w[i] > 0:

        dw[i] = (-(2*(self.X[:,i]).dot(self.Y - Y_prediction)) + self.lambda_parameter) / self.m

      else:

        dw[i] = (-(2*(self.X[:,i]).dot(self.Y - Y_prediction)) - self.lambda_parameter) / self.m


    #gradients for bias

    db = -2 * np.sum(self.Y - Y_prediction) / self.m

    #updating the weight and bias

    self.w = self.w - self.learning_rate * dw
    self.b = self.b - self.learning_rate * db

  def predict(self,X):

    return X.dot(self.w) + self.b


Importing the dependencies

In [24]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

Data Collection and Preprocessing

In [4]:
diabetes_data = pd.read_csv('/content/salary_data.csv')

In [5]:
diabetes_data.head()

Unnamed: 0,YearsExperience,Salary
0,1.1,39343
1,1.3,46205
2,1.5,37731
3,2.0,43525
4,2.2,39891


In [6]:
diabetes_data.shape

(30, 2)

In [7]:
diabetes_data.describe()

Unnamed: 0,YearsExperience,Salary
count,30.0,30.0
mean,5.313333,76003.0
std,2.837888,27414.429785
min,1.1,37731.0
25%,3.2,56720.75
50%,4.7,65237.0
75%,7.7,100544.75
max,10.5,122391.0


In [9]:
features = diabetes_data.drop(columns='Salary', axis=1)
target = diabetes_data['Salary']

In [10]:
print(features)

    YearsExperience
0               1.1
1               1.3
2               1.5
3               2.0
4               2.2
5               2.9
6               3.0
7               3.2
8               3.2
9               3.7
10              3.9
11              4.0
12              4.0
13              4.1
14              4.5
15              4.9
16              5.1
17              5.3
18              5.9
19              6.0
20              6.8
21              7.1
22              7.9
23              8.2
24              8.7
25              9.0
26              9.5
27              9.6
28             10.3
29             10.5


In [11]:
print(target)

0      39343
1      46205
2      37731
3      43525
4      39891
5      56642
6      60150
7      54445
8      64445
9      57189
10     63218
11     55794
12     56957
13     57081
14     61111
15     67938
16     66029
17     83088
18     81363
19     93940
20     91738
21     98273
22    101302
23    113812
24    109431
25    105582
26    116969
27    112635
28    122391
29    121872
Name: Salary, dtype: int64


Data Standardization

In [12]:
scaler = StandardScaler()

In [13]:
scaler.fit(features)

In [14]:
standardized_data = scaler.transform(features)

In [15]:
print(standardized_data)

[[-1.51005294]
 [-1.43837321]
 [-1.36669348]
 [-1.18749416]
 [-1.11581443]
 [-0.86493538]
 [-0.82909552]
 [-0.75741579]
 [-0.75741579]
 [-0.57821647]
 [-0.50653674]
 [-0.47069688]
 [-0.47069688]
 [-0.43485702]
 [-0.29149756]
 [-0.1481381 ]
 [-0.07645838]
 [-0.00477865]
 [ 0.21026054]
 [ 0.2461004 ]
 [ 0.53281931]
 [ 0.6403389 ]
 [ 0.92705781]
 [ 1.03457741]
 [ 1.21377673]
 [ 1.32129632]
 [ 1.50049564]
 [ 1.5363355 ]
 [ 1.78721455]
 [ 1.85889428]]


In [17]:
features = standardized_data
target = diabetes_data['Salary']

Train Test Split

In [18]:
X_train, X_test, Y_train, Y_test = train_test_split(features,target, test_size = 0.2, random_state=2)

In [19]:
print(features.shape, X_train.shape, X_test.shape)

(30, 1) (24, 1) (6, 1)


Training the SVM Model

In [20]:
model = Lasso_Regression(learning_rate=0.001, no_of_iterations=1000, lambda_parameter=0.01)

In [21]:
model.fit(X_train,Y_train)

Model Evaluation

Accuracy Score

In [25]:
X_train_prediction = model.predict(X_train)
training_data_accuracy = r2_score(Y_train, X_train_prediction)
print('Accuracy score on training data = ', training_data_accuracy)

Accuracy score on training data =  0.8379750605115395


In [26]:
X_test_prediction = model.predict(X_test)
test_data_accuracy = r2_score(Y_test, X_test_prediction)
print('Accuracy score on test data = ', test_data_accuracy)

Accuracy score on test data =  0.5854365014000669


In [29]:
input_data = (15)

input_data_as_numpy_array = np.asarray(input_data)

input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)

std_data = scaler.transform(input_data_reshaped)
print(std_data)

prediction = model.predict(std_data)
print(prediction)

print("Estimated Salary : ",prediction)

[[3.47168816]]
[154680.58020003]
Estimated Salary :  [154680.58020003]


