# Logistic Regression

Logistic regression is a process of modeling the probability of a discrete outcome given an input variable. The most common logistic regression models a binary outcome; something that can take two values such as true/false, yes/no, and so on.

In this week you will be doing logistic regression on breast cancer dataset using sklearn library. Feel free to create any new functions required.

In [20]:
#importinf libraries
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import datasets
import numpy as np

Prepare Data

In [21]:
breast_cancer = datasets.load_breast_cancer()
X, y = breast_cancer.data, breast_cancer.target

In [22]:
#spliting data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Binary cross entropy loss

In [23]:
def BCELoss(y,y_pred):
  epsilon = 1e-15  # Small value to avoid log(0)
  y_pred = np.clip(y_pred, epsilon, 1 - epsilon)
  J = -y*np.log(y_pred) - (1-y)*np.log(1-y_pred)
  return np.mean(J)
  pass

Implement Logistic Regression here :)

Print the accuracy and cross entropy loss

In [24]:
def sigmoid(x):
  sig = 1/(1+np.exp(-x))
  return sig
  pass


class LogisticRegression:
    def __init__(self, lr=0.01, iters=1000): #lr (learning rate) & iters (iterations) could be anything of your choice
      self.lr = lr
      self.iters = iters
      pass

    def fit(self, X, y):
      self.m, self.n = X.shape
      self.W = np.zeros(self.n)
      self.b = 0
      self.X = X
      self.Y = y
      for i in range (self.iters):
        self.update_weights()
      return self
      pass

    def update_weights(self):
      # A = 1/(1 + np.exp(-(self.X.dot(self.W) + self.b)))
      A = sigmoid(self.X.dot(self.W) + self.b)

      tmp = A - self.Y.T
      tmp = np.reshape(tmp, self.m)
      dW = np.dot(self.X.T, tmp)/ self.m
      db = np.sum(tmp)/ self.m

      self.W = self.W - self.lr*dW
      self.b = self.b - self.lr*db

      return self

    def predict(self, X):
      # Z = 1 / ( 1 + np.exp( - ( X.dot( self.W ) + self.b ) ) )  
      Z = sigmoid(X.dot( self.W ) + self.b)

      Y = np.where( Z > 0.5, 1, 0 )         
      return Y 
      pass

def main():
  breast_cancer = datasets.load_breast_cancer()
  X, y = breast_cancer.data, breast_cancer.target
  
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
  sc = StandardScaler()
  X_train = sc.fit_transform(X_train)
  X_test = sc.transform(X_test)

  model = LogisticRegression( lr = 0.01, iters = 1000 ) 
  model.fit( X_train, y_train )

  Y_pred = model.predict( X_test )         

  correctly_classified = 0    
       
  count = 0    
  for count in range( np.size( Y_pred ) ) :   
      
      if y_test[count] == Y_pred[count] :             
          correctly_classified = correctly_classified + 1            
      count = count + 1
        
  print( "Accuracy on test set by our model : ", (  
    correctly_classified / count ) * 100 ) 
  
  loss = BCELoss(y_test ,Y_pred)
  print("Cross Entropy loss : ", loss)

if __name__ == "__main__" :      
    main()   


Accuracy on test set by our model :  93.85964912280701
Cross Entropy loss :  2.120844143411895
