# Logistic Regression

# Data Preprocessing

## Importing the libraries

In [231]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Importing the dataset

In [232]:
winedata = pd.read_csv('winequality-red.csv',sep=";")
winedata['quality'].value_counts()

5    681
6    638
7    199
4     53
8     18
3     10
Name: quality, dtype: int64

In [233]:
def qualitifier(x):
    if x>6:
        x = 1
    else:
        x = 0
    return x
winedata['quality'] = winedata['quality'].apply(qualitifier)
winedata['quality'].value_counts


<bound method IndexOpsMixin.value_counts of 0       0
1       0
2       0
3       0
4       0
       ..
1594    0
1595    0
1596    0
1597    0
1598    0
Name: quality, Length: 1599, dtype: int64>

In [234]:
winedata['quality'].value_counts()

0    1382
1     217
Name: quality, dtype: int64

In [235]:
X = winedata.iloc[:,:-1].values
y = winedata.iloc[:,-1].values

In [236]:
print(X)
print(X.shape)

[[ 7.4    0.7    0.    ...  3.51   0.56   9.4  ]
 [ 7.8    0.88   0.    ...  3.2    0.68   9.8  ]
 [ 7.8    0.76   0.04  ...  3.26   0.65   9.8  ]
 ...
 [ 6.3    0.51   0.13  ...  3.42   0.75  11.   ]
 [ 5.9    0.645  0.12  ...  3.57   0.71  10.2  ]
 [ 6.     0.31   0.47  ...  3.39   0.66  11.   ]]
(1599, 11)


In [237]:
print(y)
print(y.shape)

[0 0 0 ... 0 0 0]
(1599,)


## Splitting the dataset into the Training set and Test set

In [238]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,y,test_size=0.20,random_state=42)

In [239]:
X_train

array([[ 8.7 ,  0.69,  0.31, ...,  3.48,  0.74, 11.6 ],
       [ 6.1 ,  0.21,  0.4 , ...,  3.25,  0.59, 11.9 ],
       [10.9 ,  0.39,  0.47, ...,  3.3 ,  0.75,  9.8 ],
       ...,
       [ 7.2 ,  0.62,  0.06, ...,  3.51,  0.54,  9.5 ],
       [ 7.9 ,  0.2 ,  0.35, ...,  3.32,  0.8 , 11.9 ],
       [ 5.8 ,  0.29,  0.26, ...,  3.39,  0.54, 13.5 ]])

In [240]:
X_test

array([[ 7.7  ,  0.56 ,  0.08 , ...,  3.24 ,  0.66 ,  9.6  ],
       [ 7.8  ,  0.5  ,  0.17 , ...,  3.39 ,  0.48 ,  9.5  ],
       [10.7  ,  0.67 ,  0.22 , ...,  3.28 ,  0.98 ,  9.9  ],
       ...,
       [ 8.3  ,  0.6  ,  0.25 , ...,  3.15 ,  0.53 ,  9.8  ],
       [ 8.8  ,  0.27 ,  0.39 , ...,  3.15 ,  0.69 , 11.2  ],
       [ 9.1  ,  0.765,  0.04 , ...,  3.29 ,  0.54 ,  9.7  ]])

In [241]:
Y_train


array([0, 0, 0, ..., 0, 1, 0])

In [242]:
Y_train.shape

(1279,)

In [243]:
Y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,

In [244]:
Y_test.shape

(320,)

In [245]:
X_train.shape

(1279, 11)

## Feature Scaling

In [246]:
from sklearn.preprocessing import MinMaxScaler
ms = MinMaxScaler()
X_train = ms.fit_transform(X_train)
X_test = ms.fit_transform(X_test)


In [247]:
X_train

array([[0.36283186, 0.39041096, 0.31      , ..., 0.58267717, 0.22699387,
        0.49230769],
       [0.13274336, 0.06164384, 0.4       , ..., 0.4015748 , 0.13496933,
        0.53846154],
       [0.55752212, 0.18493151, 0.47      , ..., 0.44094488, 0.23312883,
        0.21538462],
       ...,
       [0.2300885 , 0.34246575, 0.06      , ..., 0.60629921, 0.10429448,
        0.16923077],
       [0.2920354 , 0.05479452, 0.35      , ..., 0.45669291, 0.26380368,
        0.53846154],
       [0.10619469, 0.11643836, 0.26      , ..., 0.51181102, 0.10429448,
        0.78461538]])

In [248]:
X_test

array([[0.25714286, 0.44221106, 0.10126582, ..., 0.33043478, 0.32038835,
        0.21428571],
       [0.26666667, 0.38190955, 0.21518987, ..., 0.46086957, 0.14563107,
        0.19642857],
       [0.54285714, 0.55276382, 0.27848101, ..., 0.36521739, 0.63106796,
        0.26785714],
       ...,
       [0.31428571, 0.48241206, 0.3164557 , ..., 0.25217391, 0.19417476,
        0.25      ],
       [0.36190476, 0.15075377, 0.49367089, ..., 0.25217391, 0.34951456,
        0.5       ],
       [0.39047619, 0.64824121, 0.05063291, ..., 0.37391304, 0.2038835 ,
        0.23214286]])

In [249]:
Y_train

array([0, 0, 0, ..., 0, 1, 0])

In [250]:
Y_test

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,

In [251]:
unique_values, counts = np.unique(Y_train, return_counts=True)

# Print the unique values and their corresponding counts
for value, count in zip(unique_values, counts):
    print(f"Value: {value}, Count: {count}")

Value: 0, Count: 1109
Value: 1, Count: 170


In [252]:
unique_values,counts = np.unique(Y_test,return_counts = True)
for value, count in zip(unique_values, counts):
    print(f"Value: {value}, Count: {count}")


Value: 0, Count: 273
Value: 1, Count: 47


## Training the Logistic Regression model on the Training set

In [253]:
class LogisticRegression:
    def __init__(self,learning_rate = 0.01,num_iterations = 1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
    
    def sigmoid(self,z):
        return 1/(1+np.exp(-z))
    
    def fit(self,X,Y):
        num_samples,num_features = X.shape
        print(num_samples,num_features)
        self.weights = np.zeros(num_features)
        self.bias = 0

        for i in range(self.num_iterations):
            Y_pred = self.predicted(X)
            dw = (1/num_samples)*np.dot(X.T,(Y_pred - Y))
            db = (1/num_samples)*np.sum(Y_pred - Y)
            
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate*db
            
    def predicted(self,X):
        linear_model = np.dot(X,self.weights)+self.bias
        pred = self.sigmoid(linear_model)
        return pred
    
    def test(self,X,Y):
        Y_predtest = self.predicted(X)
        print(Y_predtest)
        Y_values = Y_predtest
        correct = 0
        total = len(Y)
        for i in range(len(Y_predtest)):
            if Y_predtest[i]>0.5:
                Y_predtest[i] =1
            else:
                Y_predtest[i]=0
        for i in range(len(Y)):
            if Y[i] == Y_predtest[i]:
                correct+=1
        print("Accuracy: ",correct/total) 
            
                
        
                

In [254]:
lgreg = LogisticRegression()
lgreg.fit(X_train,Y_train)


1279 11


In [255]:
print(lgreg.weights)
print(lgreg.bias)

[-0.23672809 -0.36926156 -0.1073699  -0.08269948 -0.14135322 -0.24184314
 -0.18902882 -0.51266781 -0.43895403 -0.09375629 -0.06698251]
-0.9287229910270933


In [256]:
print(lgreg.test(X_test, Y_test))


[0.15152402 0.14755312 0.11549734 0.13554847 0.16256846 0.13964703
 0.14193737 0.15042676 0.16264579 0.15177477 0.14571564 0.14537536
 0.16483383 0.15161079 0.15312459 0.18181241 0.14805024 0.14037307
 0.16014451 0.14337566 0.15022373 0.15115763 0.1646067  0.1760445
 0.15323353 0.1410587  0.12174725 0.1523742  0.11815617 0.13759562
 0.15134953 0.18320214 0.16382064 0.1455605  0.1429198  0.15688055
 0.16245132 0.15553065 0.14051484 0.18914922 0.13137764 0.1610246
 0.16416377 0.14564452 0.1451582  0.14690718 0.15131453 0.1453524
 0.13617636 0.16389829 0.09824094 0.130561   0.15526465 0.15961591
 0.14116103 0.15010228 0.15817774 0.14117577 0.16439301 0.13697696
 0.1627265  0.16077267 0.15979559 0.15928061 0.15370162 0.13987592
 0.16117145 0.12387776 0.15623657 0.12955439 0.16990111 0.13896469
 0.16205127 0.16762332 0.16564041 0.15264759 0.16925143 0.13387025
 0.15692451 0.16802945 0.1402003  0.15202057 0.144544   0.14152496
 0.14077918 0.14070252 0.12988291 0.15643363 0.15584418 0.1414460