In [1]:
# importing necessary libraries

import math
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
# loading breast cancer dataset

from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

df_data = pd.DataFrame(data.data)
df_data.columns = data.feature_names
df_data.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [3]:
# splitting input into train and test data

X, y = df_data, data.target

In [4]:
def split_data(X, y, test_size):
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                        test_size=0.3, random_state=27)
    
    return X_train, X_test, y_train, y_test

def initialize_weights(X_train):
    np.random.seed(31)
    return np.random.rand(X_train.shape[1],1)*0.0001, 0.1

def sigmoid(X):
    return 1/(1+np.exp(-X))

def train(X_train, y_train, W, b, learning_rate = 0.01, max_iterations = 10000):
    m = X_train.shape[0]
    cost_list = []
    for iteration in range(0, max_iterations):

        Z = np.dot(W.T, X_train.T) + b
        A = sigmoid(Z)

        cost = -(1/m)*np.sum(y_train*np.log(A) + (1-y_train)*np.log(1-A))

        dw = (1/m)*np.dot(X_train.T, (A-y_train).T)
        db = (1/m)*np.sum(A-y_train)

        W = W - learning_rate*dw
        b = b - learning_rate*db

        cost_list.append(cost)

    return W, b, cost_list

def predict(W_final, b_final, X_train, X_test, y_train, y_test, threshold):
    
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import confusion_matrix
    
    prediction_train = sigmoid(np.dot(W_final.T, X_train.T) + b)
    prediction_test = sigmoid(np.dot(W_final.T, X_test.T) + b)

    y_pred_train = [1 if element > threshold else 0 for element in prediction_train[0]]
    y_pred_test = [1 if element > threshold else 0 for element in prediction_test[0]]
    
    return accuracy_score(y_train, y_pred_train), accuracy_score(y_test, y_pred_test), confusion_matrix(y_test, y_pred_test)

In [5]:
# splitting into train test split
X_train, X_test, y_train, y_test = split_data(X, y, test_size=0.5)

# initializing random weights
W, b = initialize_weights(X_train)

# Training classifier
W_final, b_final, cost_list = train(X_train, y_train, W, b, 
                                    learning_rate = 0.00001, max_iterations = 1000)


for threshold_val in np.arange(0,1,0.1):

    # Predicting class
    train_accuracy, test_accuracy, cm = predict(W_final, b_final, X_train, X_test, 
                                                y_train, y_test, threshold=threshold_val)
    
    tn, fp, fn, tp = cm.ravel()
    false_negative_rate = fn/(tp+fn)

    print(train_accuracy, test_accuracy, false_negative_rate)

0.635678391959799 0.6081871345029239 0.0
0.8492462311557789 0.8128654970760234 0.0
0.8919597989949749 0.8362573099415205 0.0
0.907035175879397 0.8713450292397661 0.0
0.914572864321608 0.8771929824561403 0.009615384615384616
0.9271356783919598 0.8947368421052632 0.019230769230769232
0.9195979899497487 0.9064327485380117 0.028846153846153848
0.8994974874371859 0.9122807017543859 0.0673076923076923
0.8266331658291457 0.847953216374269 0.21153846153846154
0.542713567839196 0.543859649122807 0.7307692307692307
