In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.metrics import accuracy_score,f1_score

In [5]:
class LassoRegression():
  #Initiating hyperparameter
  def __init__(self,learning_rate,no_of_iterations,lambda_parameter):
    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations
    self.lambda_parameter = lambda_parameter

  def fit(self,X,y):
    self.m,self.n = X.shape
    self.w=np.zeros(self.n)
    self.b=0
    self.X = X
    self.y = y

    for i in range(self.no_of_iterations):
      self.update_weights()

  def update_weights(self):

    #Linear Relationship
    y_pred = self.predict(self.X)

    dw = np.zeros(self.n)
    for i in range(self.n):
      derivative = -2 * np.dot(self.X[:, i], self.y - y_pred) / self.m
          # Apply L1 penalty gradient (subgradient)
      if self.w[i] > 0:
        dw[i] = derivative + self.lambda_parameter
      elif self.w[i] < 0:
        dw[i] = derivative - self.lambda_parameter
      else:
          # Subgradient at 0 can be any value between [-λ, λ]
          dw[i] = derivative


    db = -2*np.sum(self.y-y_pred)/self.m

    self.w = self.w - self.learning_rate*dw
    self.b = self.b - self.learning_rate*db

  def predict(self,X):
    return np.dot(X,self.w)+self.b

In [7]:
data = pd.read_csv(r"/content/winequality-white.csv",delimiter=";")
data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.30,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.28,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4893,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
4894,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
4895,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
4896,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [8]:
data.isnull().sum()

Unnamed: 0,0
fixed acidity,0
volatile acidity,0
citric acid,0
residual sugar,0
chlorides,0
free sulfur dioxide,0
total sulfur dioxide,0
density,0
pH,0
sulphates,0


In [9]:
data.dtypes

Unnamed: 0,0
fixed acidity,float64
volatile acidity,float64
citric acid,float64
residual sugar,float64
chlorides,float64
free sulfur dioxide,float64
total sulfur dioxide,float64
density,float64
pH,float64
sulphates,float64


In [10]:
data['quality'].value_counts()

Unnamed: 0_level_0,count
quality,Unnamed: 1_level_1
6,2198
5,1457
7,880
8,175
4,163
3,20
9,5


In [11]:
ip = data.drop("quality",axis=1)
op = data['quality']

In [12]:
ip.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9


In [13]:
op.head()

Unnamed: 0,quality
0,6
1,6
2,6
3,6
4,6


In [104]:
x_train,x_test,y_train,y_test = train_test_split(ip,op,test_size=0.2,random_state=5)

In [105]:
x_train.shape

(3918, 11)

In [106]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

In [125]:
model = LassoRegression(learning_rate=0.01,no_of_iterations=1000,lambda_parameter=0.01)

In [126]:
model.fit(x_train,y_train)

In [127]:
pred = model.predict(x_test)
pred = pred.round().astype(int)

In [128]:
df = pd.DataFrame({"Actual:":list(y_test),"Prediction:":pred})
df

Unnamed: 0,Actual:,Prediction:
0,6,6
1,6,6
2,5,6
3,5,6
4,7,6
...,...,...
975,5,5
976,5,6
977,7,6
978,6,7


In [129]:
ac = accuracy_score(y_test,pred)
print("Accuracy:",ac)

Accuracy: 0.5428571428571428


**By Sklearn Model**

In [130]:
lr = Lasso(alpha=0.01,max_iter=1000,fit_intercept=True)

In [131]:
lr.fit(x_train,y_train)

In [132]:
prediction = lr.predict(x_test)
prediction = prediction.round().astype(int)

In [133]:
dfs = pd.DataFrame({"Actual:":list(y_test),"Prediction:":pred})
dfs

Unnamed: 0,Actual:,Prediction:
0,6,6
1,6,6
2,5,6
3,5,6
4,7,6
...,...,...
975,5,5
976,5,6
977,7,6
978,6,7


In [134]:
ac = accuracy_score(y_test,prediction)
print("Accuracy:",ac)

Accuracy: 0.5459183673469388
