## Binary classification with sklearn breast cancer dataset

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

In [2]:
# Initializing weights and bias
def initialiseNetwork(num_features):
  W = np.zeros((num_features, 1))
  b = 0
  parameters = {"W": W, "b": b}
  return parameters

In [3]:
# Defining sigmoid function
def sigmoid(z):
  a = 1/(1 + np.exp(-z))
  return a

In [4]:
# Defining forward propagation function
def forwardPropagation(X, parameters):
  W = parameters["W"]
  b = parameters["b"]
  Z = np.dot(W.T,X) + b
  A = sigmoid(Z)
  return A

In [5]:
# Defining cost function
def cost(A, Y, num_samples):
  cost = -1/num_samples *np.sum(Y*np.log(A) + (1-Y)*(np.log(1-A)))
  return cost

In [6]:
# Defining backpropagation function
def backPropagration(X, Y, A, num_samples):
  dZ = A - Y                          
  dW = (np.dot(X,dZ.T))/num_samples   #(X dot_product dZ.T)/num_samples
  db = np.sum(dZ)/num_samples         #sum(dZ)/num_samples
  return dW, db

In [7]:
# Function to update parameters
def updateParameters(parameters, dW, db, learning_rate):
  W = parameters["W"] - (learning_rate * dW)
  b = parameters["b"] - (learning_rate * db)
  return {"W": W, "b": b}

In [8]:
# Defining model
def model(X, Y, num_iter, learning_rate):
  num_features = X.shape[0]
  num_samples = X.shape[1]
  parameters = initialiseNetwork(num_features)                     #call initialiseNetwork()
  for i in range(num_iter):
    A = forwardPropagation(X, parameters)                       # calculate final output A from forwardPropagation()
    if(i%100 == 0):
      print("cost after {} iteration: {}".format(i, cost(A, Y, num_samples)))
    dW, db = backPropagration(X, Y, A, num_samples)                # calculate  derivatives from backpropagation
    parameters = updateParameters(parameters, dW, db, learning_rate)    # update parameters
  return parameters

In [9]:
# Defining prediction function
def predict(W, b, X):
  Z = np.dot(W.T,X) + b
  Y = np.array([1 if y > 0.5 else 0 for y in sigmoid(Z[0])]).reshape(1,len(Z[0]))
  return Y

In [10]:
X_cancer, y_cancer = load_breast_cancer(return_X_y = True)
X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_cancer, random_state = 25)

In [11]:
# Function to normalize data
def normalize(data):
  col_max = np.max(data, axis = 0)
  col_min = np.min(data, axis = 0)
  return np.divide(data - col_min, col_max - col_min)

In [12]:
X_train_n = normalize(X_train)
X_test_n = normalize(X_test)

In [13]:
X_trainT = X_train_n.T
X_testT = X_test_n.T
y_trainT = y_train.reshape(1, 426)
y_testT = y_test.reshape(1, 143)

In [14]:
parameters = model(X_trainT, y_trainT, num_iter=5000, learning_rate=0.75)                        #call the model() function with parametrs mentioned in the above cell

yPredTrain = predict(parameters['W'], parameters['b'], X_trainT)   # pass weigths and bias from parameters dictionary and X_trainT as input to the function
yPredTest = predict(parameters['W'], parameters['b'], X_testT)    # pass the same parameters but X_testT as input data

accuracy_train = 100 - np.mean(np.abs(yPredTrain - y_trainT)) * 100
accuracy_test = 100 - np.mean(np.abs(yPredTest - y_testT)) * 100
print("train accuracy: {}%".format(accuracy_train))
print("test accuracy: {}%".format(accuracy_test))
with open("Output.txt", "w") as text_file:
  text_file.write("train= %f\n" % accuracy_train)
  text_file.write("test= %f" % accuracy_test)

cost after 0 iteration: 0.6931471805599453
cost after 100 iteration: 0.24382767353051085
cost after 200 iteration: 0.18414919195134818
cost after 300 iteration: 0.1565873493485997
cost after 400 iteration: 0.1396752246321806
cost after 500 iteration: 0.1278729526958286
cost after 600 iteration: 0.1190088775113677
cost after 700 iteration: 0.11202667072700777
cost after 800 iteration: 0.10633924623930974
cost after 900 iteration: 0.10158933661241841
cost after 1000 iteration: 0.09754476494426205
cost after 1100 iteration: 0.0940469433647547
cost after 1200 iteration: 0.09098323338346236
cost after 1300 iteration: 0.08827107206470108
cost after 1400 iteration: 0.08584834873491791
cost after 1500 iteration: 0.08366730760137953
cost after 1600 iteration: 0.08169053991796828
cost after 1700 iteration: 0.07988826663984765
cost after 1800 iteration: 0.07823644647304043
cost after 1900 iteration: 0.07671542796224082
cost after 2000 iteration: 0.07530896965280098
cost after 2100 iteration: 0.07