In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time as time

In [3]:
np.random.seed(0)

#Logistic Regression

### Model

![image.png](https://drive.google.com/uc?id=1pnidxNrAEx2_belcDMizRj-IBozZMAIM)

In [5]:
# calculate linear combination of features
def linear_combination(X:np.ndarray, w:np.ndarray) ->np.ndarray:
    '''Calculate linear combination
    
    this is given by :
    z = Xw
    shape(X) = (n, m)
    shape(w) = (m, 1)
    shape(z) = (n, 1)
    returns numpy array
    '''
    return X@w
    

### Sigmoid

![image.png](https://drive.google.com/uc?id=1LpnG5Llo-bu4AiTE4CMJ4G0aWmU_VXZk)

In [6]:
# Apply sigmoid function
def sigmoid(z:np.ndarray) ->np.ndarray:
    return 1/(1+np.exp(-z))

Now we predict the examples and based on value of sigmoid we group them as <strong>label 1</strong> if $P(z) > threshold$ or <strong>label 0</strong> if $P(z) < threshold$ 

In [7]:
def predict(X:np.ndarray, w:np.ndarray, threshold:float) ->np.ndarray:
    return np.where(sigmoid(linear_combination(X, w)) > threshold, 1, 0)

In [8]:
# test the function
# always add 1 as bias to feature vector
feature_matrix = np.array([[1, 20, 2], [1, 2, 2]])
weight_vector = np.array([-1, 0, 1])

print(f'Shape of feature matrix:{feature_matrix.shape}')
print(f'Shape of weight vector:{weight_vector.shape}')

class_labels = predict(feature_matrix, weight_vector, 0.5)

print(f'Shape of output:{class_labels.shape}')
print(f'Label vectors are :{class_labels}')

Shape of feature matrix:(2, 3)
Shape of weight vector:(3,)
Shape of output:(2,)
Label vectors are :[1 1]


### Loss Function

![image.png](https://drive.google.com/uc?id=1z8IsBO-0lSdAurkeI0ieN0ALAvKFIIKG)

![image.png](https://drive.google.com/uc?id=1QFEehL2UPhByCFGZoIQomrWBD3nJ5m21)

![image.png](https://drive.google.com/uc?id=1OjBMkkg_-2UAYQ7N_IryyPEgPh1yEqhY)

In [9]:
def loss(y, sigmoid_vector, weight_vector, l1_reg_rate, l2_reg_rate):
    return (-1*(np.sum(y*np.log(sigmoid_vector) + (1-y)*np.log(1-sigmoid_vector)))
    + l1_reg_rate*np.dot(np.transpose(weight_vector), weight_vector)
    +l2_reg_rate*np.sum(np.abs(weight_vector)))

If we don't want regularization then we set $l1\_reg\_rate$ and $l2\_reg\_rate$ to $0$.

### Differentiation of Loss and Sigmoid

![image.png](https://drive.google.com/uc?id=1WM6hUNU8hS-IWposcn-EKGKSolGL0dcO)

![image.png](https://drive.google.com/uc?id=1SvCEou91tSEkoN283I7sLUXvwvbIb85M)

![image.png](https://drive.google.com/uc?id=1oQsGXmcYHcGJ8t_O_nD1quXNCgTvEhzV)

In [10]:
def calculate_gradient(X:np.ndarray, y:np.ndarray, w:np.ndarray,
                       reg_rate:float)->np.ndarray:
    '''Calculated gradient w.r.to loss function
        returna a vector containing gradient w.r.to
        each weight vector
    '''
    return np.transpose(X)@(sigmoid(linear_combination(X, w))-y) + reg_rate*w

### Combine all member functions in a single class named Logistic Regression

In [11]:
class Logistic_regression(object):
    '''Logistic reression model
    y = sigmoid(X@w)
    '''
    def set_weights(self, w):
        self.w = w

    def linear_combination(self, X:np.ndarray)->np.ndarray:
        '''
        Calculate linear combination
        this is given by :
        z = Xw
        shape(X) = (n, m)
        shape(w) = (m, 1)
        shape(z) = (n, 1)
        returns numpy array
        '''
        return X@self.w

    def sigmoid(self, z:np.ndarray) ->np.ndarray:
        '''
        Sigmoid implementation
        Returns probability of each member belinging
        to particular class
        '''
        return 1/(1+np.exp(-z))

    def activation(self, X:np.ndarray)->np.ndarray:
        return self.sigmoid(self.linear_combination(X))
    
    def predict(self, x:np.ndarray, threshold:float=0.5):
        '''
        Predict the output lables based on input X
        and weight vector w
        '''
        return (self.activation(x) > threshold).astype(int)
    
    def loss(self, X:np.ndarray, y:np.ndarray, reg_rate:float)->float:
        '''
        Calculate Binary cross entropy loss
        '''
        predicted_prob = self.activation(X)
        return (-1*(np.sum(y*np.log(predicted_prob) + 
                    (1-y)*np.log(1-predicted_prob)))+
                    reg_rate*np.dot(np.transpose(self.w), self.w))
        
    def calculate_gradient(self, X:np.ndarray, y:np.ndarray,
                           reg_rate:float)->np.ndarray:
        '''
        Calculate gradient of loss w.r.ro each weight vector
        '''
        return np.transpose(X)@(self.activation(X)-y) + reg_rate*self.w
    
    def update_weights(self, grad:np.ndarray, lr:float)->np.ndarray:
        '''
        Updates weights based on the formula
        w_new = w_old - lr*(grad_of_loss)
        '''
        return (self.w-lr*grad)
    
    def gd(self, X:np.ndarray, y:np.ndarray,
           num_epoches:int, lr:float, reg_rate:float)->np.ndarray:
        '''
        Estimated parameters bof logistic regression model 
        with gradient descent.
        '''
        self.w = np.zeros(X.shape[1])
        self.w_all = []
        self.err_all = []
        for i in range(num_epoches):
            dJdW = self.calculate_gradient(X, y, reg_rate)
            self.w_all.append(self.w)
            self.err_all.append(self.loss(X, y, reg_rate)) # stored loss at each step
            self.w = self.update_weights(dJdW, lr)
        return self.w