# **Logistic Regression from scratch**


## **Equations:**
<img src=".\images\sigmoid.png" alt="alt text" width="600"/>

*y^* - predicted value

*s(x)* - sigmoid function

--- 
Cost function

<img src=".\images\cost_func.png" alt="alt text" width="600"/>

- In Logistic regression we use cross entropy function instead of MSE(mean square error).

---

We use gradient descent for Logistic regression. we need to calculate the gradient of cost function acctording to weights and bias. 

<img src=".\images\gradient.png" alt="alt text" width="600"/>

*y_i* - actual value

*y^* - predicted value

---
#### Gradient Descent
<img src=".\images\learning_rate.png" alt="alt text" width="600"/>

*α* - learning rate

In [2]:
# libraries
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt  

# for filtering warnings
import warnings  
warnings.filterwarnings('ignore')  # Ignore warnings

from sklearn import datasets  # for loading datasets
from sklearn.model_selection import train_test_split  #for split dataset 2 part: train and test sets
from sklearn.linear_model import LogisticRegression  #LogisticRegression model
from sklearn.metrics import *  


In [3]:
class Log_reg():

    def __init__(self,n_iterations = 100, learning_rate = 0.01):
        self.bias = None
        self.weights = None
        self.learning_rate = learning_rate
        self.n_iterations = n_iterations
        
    def sigmoid_f(self, z):
        return 1 / (1 + np.exp(-z))
    
    def compute_cost(self, y, h):
        m = len(y)
        return -1/m * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h))
    
    def fit(self, X, y):
        n_samples = X.shape[0]
        n_features = X.shape[1]   
        
        self.weights = np.ones(n_features)
        self.bias = 1

        for _ in range(self.n_iterations):
            z = np.dot(X, self.weights) + self.bias    #linear prediction
            predictions = self.sigmoid_f(z)

            #update weights and bias
            dw = 1/n_samples * np.dot(X.T, (predictions - y))
            db = 1/n_samples * np.sum(predictions - y)
        
            self.weights = self.weights - self.learning_rate * dw
            self.bias = self.bias - self.learning_rate * db

    def predict(self, X):
        z = np.dot(X, self.weights) + self.bias
        y_pred = self.sigmoid_f(z)

        class_pred = [0 if y<=0.5 else 1 for y in y_pred]
        return class_pred
    

def accuracy(y_pred, y_test):
    return np.sum(y_pred == y_test)/ len(y_test)


In [19]:
# Loading breast_cancer data from scikit-learn datasets

bc = datasets.load_breast_cancer()
X,y  = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)
print(f'shape of X_train: {X_train.shape}')
print(f'shape of y_train: {y_train.shape}')

shape of X_train: (455, 30)
shape of y_train: (455,)


In [16]:
# Diabets dataset

diabets = pd.read_csv(r'.\diabets.csv')
df = diabets.copy()
df = df.dropna()

X = df.drop(columns=['Outcome'])
y = df['Outcome']

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size= 0.2, 
                                                    random_state=42)

In [20]:
# logistic regression from scratch

model = Log_reg(learning_rate=0.01,n_iterations=1000)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

acc = accuracy(y_pred=y_pred,y_test=y_test)
print(f'accuracy of logistic regression from scratch: {acc}')

accuracy of logistic regression from scratch: 0.9473684210526315


In [21]:
# logistic regression from Scikit-learn
logmodel  =  LogisticRegression(max_iter= 1000)
logmodel.fit(X_train,y_train)
y_pred=logmodel.predict(X=X_test)
acc_log_reg = accuracy_score(y_true=y_test,y_pred=y_pred)
print(f'accuracy of logistic regression from Scikit-learn: {acc_log_reg}')

accuracy of logistic regression from Scikit-learn: 0.956140350877193
