In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd



# load data

In [2]:
df=load_breast_cancer(as_frame=True,return_X_y=True) #returns df
data=df[0]
target=df[1]
data.head()


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [3]:
#check null values
data.isna().sum()

mean radius                0
mean texture               0
mean perimeter             0
mean area                  0
mean smoothness            0
mean compactness           0
mean concavity             0
mean concave points        0
mean symmetry              0
mean fractal dimension     0
radius error               0
texture error              0
perimeter error            0
area error                 0
smoothness error           0
compactness error          0
concavity error            0
concave points error       0
symmetry error             0
fractal dimension error    0
worst radius               0
worst texture              0
worst perimeter            0
worst area                 0
worst smoothness           0
worst compactness          0
worst concavity            0
worst concave points       0
worst symmetry             0
worst fractal dimension    0
dtype: int64

In [4]:
np.max(data)

mean radius                  28.11000
mean texture                 39.28000
mean perimeter              188.50000
mean area                  2501.00000
mean smoothness               0.16340
mean compactness              0.34540
mean concavity                0.42680
mean concave points           0.20120
mean symmetry                 0.30400
mean fractal dimension        0.09744
radius error                  2.87300
texture error                 4.88500
perimeter error              21.98000
area error                  542.20000
smoothness error              0.03113
compactness error             0.13540
concavity error               0.39600
concave points error          0.05279
symmetry error                0.07895
fractal dimension error       0.02984
worst radius                 36.04000
worst texture                49.54000
worst perimeter             251.20000
worst area                 4254.00000
worst smoothness              0.22260
worst compactness             1.05800
worst concav

# split data

In [5]:
#append bias
data =np.array(data)
m=data.shape[0]
data=np.append(np.ones((m,1)),data,axis=1)
data.shape

(569, 31)

In [6]:
x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.3,stratify=target,random_state=42)

#reshape label vectors
y_train = np.array(y_train).reshape(-1,1)
y_test = np.array(y_test).reshape(-1,1)
 
print("x train: ", x_train.shape)
print("x test: ", x_test.shape)
print("y train: ", y_train.shape)
print("y test: ", y_test.shape)

x train:  (398, 31)
x test:  (171, 31)
y train:  (398, 1)
y test:  (171, 1)


# normalize numeric data and map binary target

In [7]:
stand=StandardScaler()
x_train=stand.fit_transform(x_train)
x_test=stand.transform(x_test)
x_train 

array([[ 0.        , -0.70982078, -0.258417  , ...,  0.40093281,
        -0.41407439,  0.44522333],
       [ 0.        , -0.83033136,  2.2311266 , ..., -1.69278836,
        -2.09572345, -1.32341746],
       [ 0.        , -1.01109725, -0.22726989, ..., -1.36423974,
        -0.35412529, -0.88664465],
       ...,
       [ 0.        , -0.12735293, -1.37526319, ..., -0.50161479,
        -0.84447817,  0.15509266],
       [ 0.        ,  0.84820898, -0.05818561, ...,  2.06733377,
         0.27610881,  0.33537313],
       [ 0.        , -1.20219261, -0.28511452, ..., -0.22747926,
        -0.48478358,  1.68038109]])

# perfrom steps of gradient

In [8]:
def hypothesis(x,wvector):
    h=x@wvector
    return h

In [9]:
def sigmoid(x):
    z=1/(1+np.e**(-x))
    return z

In [10]:
def forward(x_train,y_train,ysig): #compute loss 
   
    loss=-y_train*np.log(ysig)-(1-y_train)*np.log(1-ysig)
    cost= (np.sum(loss)) / x_train.shape[0] 
    return cost

In [20]:
def back(x_train,y_train,ysig):#compute gradients,(1/(x_train).shape[0]) *
   
    gradients = x_train.T@(ysig-y_train)
    return gradients

In [12]:
def update(x_train,y_train,lr,itera):#update parameters
    error=[]
    saved_y=np.zeros(len(x_train),) #save y_predit
    
    m=x_train.shape[1] #no of features
    
    thetavect=np.full((m,1),0.01) #initialize weights
    
    for i in range(0,itera):
        z=hypothesis(x_train,thetavect)
        ysig=sigmoid(z)
        #print(ysig)
        cost=forward(x_train,y_train,ysig)
        gradients=back(x_train,y_train,ysig)
        thetavect -= lr*gradients
        
        error.append(cost)
        
        
    return thetavect,error

# create decision function

In [17]:
def predict(thetas, x_test):
    # x_test is a input for forward propagation
    
    f=hypothesis(x_test,thetas)
    z = sigmoid(f)
    Y_prediction = []
    
    # if z is bigger than 0.5, our prediction is sign one (y_pred = 1),
    # if z is smaller than 0.5, our prediction is sign zero (y_pred = 0)
    
    for i in range(z.shape[0]):
        if z[i]<= 0.5:
            Y_prediction.append(0)
        else:
            Y_prediction.append(1)
    return Y_prediction

In [14]:

def log_regression(x_train, y_train, x_test, y_test,lr,itera):
     
    parameters, cost_list = update(x_train, y_train, lr, itera) #train the model
     
    y_prediction_test = predict(parameters, x_test) #test the model on training and testing data
    y_prediction_train = predict(parameters, x_train)
    
    #compute accuracy
    print(f'''trainAcc={accuracy_score(y_train,y_prediction_train)},
              testAcc={accuracy_score(y_test,y_prediction_test)}''')
     


# test the code

In [21]:
log_regression(x_train, y_train, x_test,y_test, lr = 0.01, itera = 150)

trainAcc=0.9899497487437185,
              testAcc=0.9766081871345029


# comparing to logreg

In [16]:
logreg = LogisticRegression(random_state = 42, max_iter = 150)
print("test accuracy: {} ".format(
    logreg.fit(x_train, y_train).score(x_test, y_test)))
print("train accuracy: {} ".format(
    logreg.fit(x_train, y_train).score(x_train, y_train)))

test accuracy: 0.9883040935672515 
train accuracy: 0.9874371859296482 


  return f(*args, **kwargs)
  return f(*args, **kwargs)
