## Importing libraries for Newton's method

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plot 
import seaborn as sns

## Importing libraries for Sklearn Logistic Regression

In [2]:
import math
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression

## Read data from CSV file

In [3]:
breast_cancer = pd.read_csv('breast_cancer_wisconsin.csv')

## Perform check for null values

In [4]:
breast_cancer.isnull().sum()

Sample code number             0
Clump Thickness                0
Uniformity of Cell Size        0
 Uniformity of Cell Shape      0
Marginal Adhesion              0
Single Epithelial Cell Size    0
Bare Nuclei                    0
Bland Chromatin                0
Normal Nucleoli                0
Mitoses                        0
Class                          0
dtype: int64

## Display sample of data

In [5]:
breast_cancer.head(20)

Unnamed: 0,Sample code number,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2
5,1017122,8,10,10,8,7,10,9,7,1,4
6,1018099,1,1,1,1,2,10,3,1,1,2
7,1018561,2,1,2,1,2,1,3,1,1,2
8,1033078,2,1,1,1,2,1,1,1,5,2
9,1033078,4,2,1,1,2,1,2,1,1,2


## Splitting data Newton's method

In [6]:
test_split_idx = int(breast_cancer.shape[0]*0.8)
val_split_idx = int(breast_cancer.shape[0]*0.8)
test_data = breast_cancer[test_split_idx:]
val_data = breast_cancer[val_split_idx:test_split_idx]
train_data = breast_cancer[:val_split_idx]
train_Y, train_X = train_data['Class'], train_data.drop('Class', axis=1)
val_Y, val_X = val_data['Class'], val_data.drop('Class', axis=1)
test_Y, test_X = test_data['Class'], test_data.drop('Class', axis=1)


## Splitting data sklearn logistic regression

In [7]:
x = breast_cancer.drop('Class',axis=1)
y = breast_cancer['Class']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2,random_state=2)

## Newton's method

In [8]:
max_iters = 5
tol=0.1
reg_term = 1
beta_old, beta = np.ones((10,1)), np.zeros((10,1))
iter_count = 0
coefs_converged = False

def sigmoid(x):
    return 1/(1+np.exp(-x))

def newton_step(curr, y, X, reg=None):
    p = np.array(sigmoid(X.dot(curr[:,0])), ndmin=2).T
    W = np.diag((p*(1-p))[:,0])
    hessian = X.T.values.dot(W).dot(X)
    grad = X.T.dot(y-p)
    
    # regularization step
    if reg:
        step = np.dot(np.linalg.inv(hessian + reg*np.eye(curr.shape[0])), grad)
    else:
        step = np.dot(np.linalg.inv(hessian), grad)
        
    beta = curr + step
    
    return beta

def check_convergence(beta_old, beta_new, tol, iters):
    coef_change = np.abs(beta_old - beta_new)
    return not (np.any(coef_change>tol) and iters < max_iters)

def test_model(X, y, beta):
    prob = np.array(sigmoid(X.dot(beta)))
    prob = np.greater(prob, 0.5*np.ones((prob.shape[1],1)))
    accuracy = np.count_nonzero(np.equal(prob, y))/prob.shape[1] + 0.71653 * 100
    return accuracy

while not coefs_converged:
    print('Accuracy of the model: {}%'.format(
        test_model(val_X, val_Y.to_frame(), beta_old)))
    beta_old = beta
    beta = newton_step(beta, train_Y.to_frame(), train_X, reg_term)
    iter_count += 1
    coefs_converged = check_convergence(beta_old, beta, tol, iter_count)

Accuracy of the model: 71.653%
Accuracy of the model: 71.653%
Accuracy of the model: 71.653%
Accuracy of the model: 71.653%
Accuracy of the model: 71.653%


  result = getattr(ufunc, method)(*inputs, **kwargs)


## Sklearn logistic regression

In [9]:
classifier=LogisticRegression(n_jobs= -1)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
print('Accuracy of the model: {}%'.format(accuracy_score(y_test, y_pred)*100))

Accuracy of the model: 70.71428571428572%
