## LASSO REGRESSION   

In [84]:
import numpy as np

In [78]:
class Lasso():
    #initating the hyperparameters
    def __init__(self,learning_rate,no_of_iterations,lambda_parameter):
        
        self.learning_rate=learning_rate# Learning rate for gradient descent
        self.no_of_iterations=no_of_iterations# Number of iterations for gradient descent
        self.lambda_parameter=lambda_parameter# Lasso regularization parameter-penalty term
        
    #fit function
    def fit(self,X,Y):
        # m-->number of datapoints(rows)
        # n-->input features(columns)
        # number of weights=number of input features
        self.m,self.n=X.shape
        
        #initiating zeros into weight array.
        self.w=np.zeros(self.n) # Weight array
        self.b=0 #bias
        
        self.X=X  # Input features
        
        self.Y=Y  # Target varaibles
       
    
        #Gradient Descent
        
        for i in range(self.no_of_iterations):
            
            self.update_weights()
    
    
    # Update the weights and bias to minimize the loss function
    def update_weights(self):
        
        Y_predict=self.predict(self.X) # Predicted target values
        
        #gradients(dw,db)
        
        #gradient for weights
        dw=np.zeros(self.n)
        
        for i in range(self.n):
        
            if self.w[i]>0:
                
                dw[i]=(-(2*(self.X[:,i].dot(self.Y-Y_predict))+self.lambda_parameter)/self.m)
                
            else:
                
                dw[i]=(-(2*(self.X[:,i].dot(self.Y-Y_predict))-self.lambda_parameter)/self.m)
                
                
        #gradient for bias        
        db=-2 * np.sum(self.Y-Y_predict)/self.m
        
        #update the weight and bias
        self.w=self.w-self.learning_rate*dw
        self.b=self.b-self.learning_rate*db
                
    #prediction of target variable
    def predict(self,X): 
        
         # Y = w * X + b
        return X.dot(self.w)+self.b

In [9]:
# importing the dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn import metrics

#### Data Pre-Processing

In [38]:
# Read the CSV file 'Salary_Data.csv' into a DataFrame
salary_data = pd.read_csv('Salary_Data.csv')

In [76]:
salary_data.shape

(30, 2)

In [44]:
# checking for missing values
salary_data.isnull().sum()

YearsExperience    0
Salary             0
dtype: int64

In [77]:
# Extract features from the 'salary_data' DataFrame
features = salary_data.iloc[:,:-1].values  
# Extract target values from the 'salary_data' DataFrame
target = salary_data.iloc[:,1].values

In [46]:
print(features)

[[ 1.1]
 [ 1.3]
 [ 1.5]
 [ 2. ]
 [ 2.2]
 [ 2.9]
 [ 3. ]
 [ 3.2]
 [ 3.2]
 [ 3.7]
 [ 3.9]
 [ 4. ]
 [ 4. ]
 [ 4.1]
 [ 4.5]
 [ 4.9]
 [ 5.1]
 [ 5.3]
 [ 5.9]
 [ 6. ]
 [ 6.8]
 [ 7.1]
 [ 7.9]
 [ 8.2]
 [ 8.7]
 [ 9. ]
 [ 9.5]
 [ 9.6]
 [10.3]
 [10.5]]


In [47]:
print(target)

[ 39343.  46205.  37731.  43525.  39891.  56642.  60150.  54445.  64445.
  57189.  63218.  55794.  56957.  57081.  61111.  67938.  66029.  83088.
  81363.  93940.  91738.  98273. 101302. 113812. 109431. 105582. 116969.
 112635. 122391. 121872.]


#### Test & Train data

In [79]:
X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size=0.20, random_state = 3)
# Splitting the dataset into training and testing sets
# X_train: Training features, X_test: Testing features
# Y_train: Training target values, Y_test: Testing target values
# test_size=0.20: Allocating 20% of the data for testing, 80% for training
# random_state=3: Setting a random seed for reproducibility

#### Training the LASSO model

In [69]:
# Create an instance of the Lasso regression model
model = Lasso(learning_rate = 0.02, no_of_iterations=1000,lambda_parameter=200)

In [80]:
# Train the Lasso regression model using the training data
model.fit(X_train, Y_train)

#### Prediction

In [81]:
# Generate predictions using the trained Lasso regression model on the test data
test_data_prediction = model.predict(X_test)

In [82]:
print(test_data_prediction)

[ 72348.09147891  53509.39563124 100606.13525042 115677.09192856
  81767.43940275  68580.35230938]


In [83]:
from sklearn import metrics

# R squared error
# It calculates the R-squared (coefficient of determination) score, 
#which indicates how well the predicted values fit the actual values. 
#An R-squared value closer to 1 indicates a better fit.
score_r_square = metrics.r2_score(Y_test, test_data_prediction)

# Mean Absolute Error
#A lower MAE indicates better model performance.
score_mean_absolute = metrics.mean_absolute_error(Y_test, test_data_prediction)

print("R squared error : ", score_r_square)
print('Mean Absolute Error : ', score_mean_absolute )

R squared error :  0.9695095870223456
Mean Absolute Error :  2900.7100634694325
