### Implementing Logistic regression using Gradient Descent on Breast Cancer dataset

In [1]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv('breast_cancer.csv')

In [6]:
df.head()

Unnamed: 0.1,Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,diagnosis
0,0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [7]:
df.drop('Unnamed: 0',axis = 1 , inplace = True) #dropping unwanted columns

In [16]:
x = df.loc[:, df.columns != 'diagnosis'].to_numpy()
y = df['diagnosis'].to_numpy()

In [17]:
x = x.T
y = y.reshape(1,x.shape[1])

In [18]:
print(x.shape)
print(y.shape)

(30, 569)
(1, 569)


First step in logistic regression is defining the sigmoid function

In [19]:
def sigmoid(c):
    sig = 1/(1+np.exp(-c))
    return sig

Defining the loss function

In [20]:
def loss_function(y,y_pred):
    logistic_loss = -(1/150)*np.mean(y*(np.log(y_pred)) - (1-y)*np.log(1-y_pred))  
    return logistic_loss

Defining the model train function

In [21]:
def fit(x, y, epochs, lr):
    m = x.shape[1] #150
    n = x.shape[0] #4
    weights = np.zeros((n,1)) #150,1
    bias = 0

    for i in range(epochs):
        
        lin_pred = np.dot(weights.T, x) + bias
        logistic_pred = sigmoid(lin_pred)

        dw = (1/m)*np.dot(logistic_pred-y, x.T)
        db = (1/m) * np.sum(logistic_pred-y)

        weights = weights - lr*dw.T#adjusting the weights after each iteration
        bias = bias - lr*db#adjusting the bias term after every iteration    

        # if i%10==0:
            # print("Cost is : ", loss_function(y, logistic_pred)) #loss function can be used for monitoring the error after each iteration

    return weights, bias    

In [22]:
weights, bias = fit(x, y, 1000, lr = 00.1)

  sig = 1/(1+np.exp(-c))


Prediction

In [23]:
def predict(x,y,w,b):
    lin_pred = np.dot(weights.T,x) + b
    logistic_pred = sigmoid(lin_pred)
    return logistic_pred

In [24]:
y_hat = predict(x, y, weights, bias)

  sig = 1/(1+np.exp(-c))


In [26]:
y_hat = y_hat.tolist()

In [27]:
pred_class = [1 if i > 0.5 else 0 for i in y_hat[0]] #converting prediction probability to class. anything below 0.5 is converted to class 0 and above 0.5 is converted to class 1

In [15]:
acc = np.sum(np.equal(y[0], pred_class))/len(y[0])

In [16]:
print("Accuracy score is :", acc)

Accuracy score is : 0.9173989455184535
