In [26]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [27]:
data = load_breast_cancer()
df = pd.DataFrame(data.data, columns = data.feature_names)
df['target'] = data.target
print(df.head())
print(df.shape)

   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter  worst area  \
0             

In [28]:
#load data from sklearn
data = load_breast_cancer()
x = data.data
y = data.target.reshape(-1, 1)

In [29]:
# feature standarize
x = (x -np.mean(x, axis=0))/np.std(x, axis =0)

In [30]:
# split train and test data
x_train,x_test, y_train, y_test = train_test_split(x,y, test_size = 0.3, random_state=42)

In [31]:
# initialize weight
def in_weight(n_feature):
    w = np.zeros((n_feature, 1))
    b = 0
    return w, b
                 

In [32]:
# sigmoid function ( z = 1/1+e^-z)
def sigmoid(z):
    return 1/ (1 + np.exp(-z))

In [33]:
z = np.array([[0], [2], [-2]])
print(sigmoid(z))

[[0.5       ]
 [0.88079708]
 [0.11920292]]


In [34]:
#forward propagation
def fw_propagation(x,w,b):
    z = np.dot(x, w) + b
    y_pred = sigmoid(z)
    return y_pred

In [35]:
w,b = in_weight(x_train.shape[1])
y_pred = fw_propagation(x_train,w,b)
print("prediction (first 5)")
print(y_pred[:5])

prediction (first 5)
[[0.5]
 [0.5]
 [0.5]
 [0.5]
 [0.5]]


In [36]:
# Binary cross entropy
def cost_function(y, y_pred):
    m = y.shape[0]
    cost = -(1/m)*np.sum(y* np.log(y_pred + 1e-8) + (1-y)*np.log(1-y_pred + 1e-8))
    return cost

In [37]:
cost = cost_function(y_train, y_pred)
print(cost)

0.6931471605599453


In [38]:
# gradiant calculation 
def backward_propagation(x, y, y_pred):
    m = x.shape[0]
    dz = y_pred - y
    dw = (1/m)*np.dot(x.T,dz)
    db = (1/m)*np.sum(dz)
    return dw,db

In [39]:
dw,db = backward_propagation(x_train,y_train, y_pred)
print (dw[:5])
print(db)

[[0.34430305]
 [0.19631409]
 [0.34992127]
 [0.33771185]
 [0.17269225]]
-0.12562814070351758


In [40]:
# update weights and bias using gradiant descent
def update(w,b,dw,db,alpha):
    alpha = 0.01
    w = w - alpha*dw
    b = b - alpha*db
    return w, b

In [41]:
alpha = 0.01
w ,b = update(w,b,dw,db,alpha)
print(w[:5])
print(b)

[[-0.00344303]
 [-0.00196314]
 [-0.00349921]
 [-0.00337712]
 [-0.00172692]]
0.001256281407035176


In [42]:
# traning the model
def train(x, y, epochs, alpha):
    w,b = in_weight(x.shape[1])
    for i in range(epochs):
        y_pred = fw_propagation(x, w, b)
        cost = cost_function(y, y_pred)
        dw, db = backward_propagation(x, y, y_pred)
        w, b = update(w,b,dw,db,alpha)
        if i == 0:
            print(f"epoch{i} - cost: {cost :.4f}")
            return w, b

In [43]:
w, b = train(x_train, y_train, epochs=1000,alpha=0.01)

epoch0 - cost: 0.6931


In [44]:
# predict function 
def predict(x, w, b):
    y_pred = sigmoid(np.dot(x, w)+b)
    return y_pred >= 0.5


In [45]:
# accuracy score
def accuracy(y_true, y_pred):
    y_pred = y_pred.astype(int)
    return np.mean(y_true == y_pred)

In [47]:
# run test data
y_test_pred=predict(x_test, w, b)
test_acc = accuracy(y_test, y_test_pred)
print(f"test accuracy :{test_acc * 100:.2f}%")

test accuracy :93.57%
