In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets

In [2]:
cancerdata = datasets.load_breast_cancer();

In [3]:
def sigmoid(z):
    z = np.clip(z,-500,500);
    return 1/(1 + np.exp(-z));
def findcost(y,ypred):
    n = len(y);
    ypred = np.clip(ypred,1e-15,1 - 1e-15);
    cost = (-1/n)*(np.dot(y,np.log(ypred)) + np.dot(1 - y,np.log(1 - ypred)));
    return cost;
def fit(x,y,iterations,learningrate):
    num = len(x)
    n = len(x[0]);
    m = np.zeros(n);
    c = 0;
    for i in range(iterations):
        z = np.dot(x,m) + c;
        ypred = sigmoid(z);
        error = ypred - y;
        slopem = (1/num)*(np.dot(x.T,error));
        slopec = (1/num)*(np.sum(error));
        cost = findcost(y,ypred);
        print(i,"Cost : ",cost);
        m = m - learningrate*slopem;
        c = c - learningrate*slopec;
    return m,c;
def predict(xtest,m,c):
    z = np.dot(xtest,m) + c;
    h = sigmoid(z);
    return (h >= 0.5).astype(int);
def score(ytest,ypred):
    return (ypred == ytest).mean();

In [4]:
x = cancerdata.data;
y = cancerdata.target;

In [5]:
print(x.shape,y.shape);

(569, 30) (569,)


In [6]:
from sklearn import model_selection as ms
xtrain,xtest,ytrain,ytest = ms.train_test_split(x,y);
print(xtrain.shape,ytrain.shape,xtest.shape,ytest.shape);

(426, 30) (426,) (143, 30) (143,)


In [7]:
m,c = fit(xtrain,ytrain,3000,0.0005);
ypred = predict(xtest,m,c);

0 Cost :  0.693147180559946
1 Cost :  15.288321809534093
2 Cost :  12.323980166281244
3 Cost :  22.1417396613098
4 Cost :  12.323980166281244
5 Cost :  22.215081530998912
6 Cost :  12.323980166285994
7 Cost :  22.215081530998912
8 Cost :  2.8576494495477442
9 Cost :  22.215081530998912
10 Cost :  11.161947504514835
11 Cost :  22.215081530998912
12 Cost :  11.723166518504915
13 Cost :  12.323980166281244
14 Cost :  21.99431247161058
15 Cost :  12.323980166281244
16 Cost :  22.215081530998912
17 Cost :  12.323980166281402
18 Cost :  22.215081530998912
19 Cost :  2.527536821254267
20 Cost :  22.121553376165068
21 Cost :  12.323980166281244
22 Cost :  22.215081530998912
23 Cost :  11.747662179528696
24 Cost :  22.215081530998912
25 Cost :  1.2306916411118558
26 Cost :  12.601034853262439
27 Cost :  12.323980166281244
28 Cost :  21.92105750215475
29 Cost :  12.323980166281244
30 Cost :  22.213477991113688
31 Cost :  11.400566348751696
32 Cost :  22.215081530998912
33 Cost :  3.5430551192351

In [8]:
print(ypred.shape);

(143,)


In [9]:
accuracy = score(ytest,ypred);
print(accuracy);

0.9370629370629371
