In [110]:
import pandas as pd
import numpy as np
from  sklearn.metrics import accuracy_score

In [111]:
data = pd.read_csv('breast_cancer.csv')
data

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,5,1,1,1,2,1,3,1,1,2
1,5,4,4,5,7,10,3,2,1,2
2,3,1,1,1,2,2,3,1,1,2
3,6,8,8,1,3,4,3,7,1,2
4,4,1,1,3,2,1,3,1,1,2
...,...,...,...,...,...,...,...,...,...,...
678,3,1,1,1,3,2,1,1,1,2
679,2,1,1,1,2,1,1,1,1,2
680,5,10,10,3,7,3,8,10,2,4
681,4,8,6,4,3,4,10,6,1,4


In [112]:
data.isna().value_counts()

Clump Thickness  Uniformity of Cell Size  Uniformity of Cell Shape  Marginal Adhesion  Single Epithelial Cell Size  Bare Nuclei  Bland Chromatin  Normal Nucleoli  Mitoses  Class
False            False                    False                     False              False                        False        False            False            False    False    683
Name: count, dtype: int64

In [113]:
X = data.drop(['Class'], axis=1)
y = data['Class']

In [114]:
y.replace(to_replace= [2,4], value=[0,1],inplace=True)

In [115]:
X = X.to_numpy()
y = y.to_numpy()
X.shape

(683, 9)

In [116]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.2, random_state=100)

In [117]:
def compute_sigmoid(z):
    g = 1/(1 + np.exp(-z))
    return g

In [118]:
def compute_cost(x, Y, w, b):
    m = x.shape[0]
    cost = 0
    for i in range(m):
        z = np.dot(x[i],w) + b
        g = compute_sigmoid(z)
        cost += -(Y[i] * np.log(g) + (1-g) * np.log(1 - Y[i]))
    cost /=  m
    return cost

In [119]:
def compute_gradient(x, Y, w, b):
    m,n = x.shape
    dj_dw = np.zeros(n)
    dj_db = 0
    for i in range(m):
        fwb = np.dot(x[i],w) + b
        g_i = compute_sigmoid(fwb)
        error = g_i - Y[i]
        for j in range(n):
            dj_dw[j] += (error)*x[i][j]
        dj_db += error
    dj_dw /= m
    dj_db /= m
    return dj_dw, dj_db

In [120]:
def gradient_descent(x, Y, num_of_iter, alpha):
    n = x.shape[1]
    w = np.zeros(n)
    b = 0
    for i in range(num_of_iter):
        dj_dw, dj_db = compute_gradient(x, Y, w, b)
        w -= dj_dw * alpha
        b -= dj_db * alpha
    return w, b
    

In [121]:
w,b = gradient_descent(X_train, y_train, 10000, 0.01)
print(f'w : {w}   b{b}')

w : [ 0.00370189  0.5287045  -0.11993437  0.34686773 -0.76240847  0.47468304
  0.15394801  0.50648642  0.28764833]   b-4.444011986770991


In [122]:
def compute_prediction(x, w_in , b_in):
    m = x.shape[0]
    fwb = np.dot(x, w_in) + b_in
    prediction = compute_sigmoid(fwb)
    prediction = (prediction >= 0.5).astype(int)
    return prediction


In [123]:
prediction = compute_prediction(X_test,w,b)
c = 0
for i in range(len(prediction)):
    if prediction[i] == y_test[i]:
        c += 1
print(c/len(prediction))

0.9341864716636198


In [124]:
accuracy_score(prediction,y_test)

0.9341864716636198