## Logistic regression using Newton-raphson method

In [1]:
pip install ucimlrepo

Note: you may need to restart the kernel to use updated packages.


### Importing needed libraries

In [2]:
from ucimlrepo import fetch_ucirepo
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

### importing breast cancer diagnostic dataset

In [3]:
cancer_data = fetch_ucirepo(id=17)

In [4]:
features = cancer_data.data.features
targets = cancer_data.data.targets

### data preprocessing

In [5]:
# Map labels 'M' (Malignant) to 1 and 'B' (Benign) to 0
label_mapping = {'M': 1, 'B': 0}
targets['Diagnosis'] = targets['Diagnosis'].map(label_mapping)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  targets['Diagnosis'] = targets['Diagnosis'].map(label_mapping)


### Set test and validation split sizes
testing    - 80% data of training data
validation - 80% data of testing data

In [6]:
split_data_size = 0.2

In [7]:
# Split the data into training, validation, and test sets
train_features, temp_features, train_targets, temp_targets = train_test_split(
    features, targets, test_size=split_data_size, random_state=np.random.randint(1, 100))
val_features, test_features, val_targets, test_targets = train_test_split(
    temp_features, temp_targets, test_size=split_data_size, random_state=np.random.randint(1, 100))


#### defining necessory functions for calculating gradient

In [8]:
# sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# calculate probabilities
def calculate_probabilities(features, coefficients):
    return np.array(sigmoid(features.dot(coefficients[:, 0])), ndmin=2).T

# calculate the weight matrix
def calculate_weight_matrix(probabilities):
    return np.diag((probabilities * (1 - probabilities))[:, 0])

# calculate the Hessian matrix
def calculate_hessian(features, weight_matrix):
    return features.T.dot(weight_matrix).dot(features)

# calculate the gradient
def calculate_gradient(features, targets, probabilities):
    return features.T.dot(targets - probabilities)



In [9]:
# Newton-Raphson method
def newton_raphson_step(curr_coefficients, targets, features, regularization=None):
    probabilities = calculate_probabilities(features, curr_coefficients)
    weight_matrix = calculate_weight_matrix(probabilities)

    features = features.reset_index(drop=True)
    targets.reset_index(drop=True, inplace=True)

    hessian = calculate_hessian(features, weight_matrix)
    gradient = calculate_gradient(features, targets, probabilities)

    print("-------------")
    # Regularization step
    if regularization:
        step = np.dot(np.linalg.inv(hessian + regularization * np.eye(curr_coefficients.shape[0])), gradient)
    else:
        step = np.dot(np.linalg.inv(hessian), gradient)

    new_coefficients = curr_coefficients + step
    return new_coefficients



In [10]:
# check for convergence
def has_converged(coefficients_old, coefficients_new, tolerance, iterations):
    coefficient_change = np.abs(coefficients_old - coefficients_new)
    print(coefficient_change)
    return not (np.any(coefficient_change > tolerance) and iterations < max_iterations)



In [11]:
# evaluate the model accuracy
def evaluate_model(features, targets, coefficients):
    probabilities = calculate_probabilities(features, coefficients)

    predictions = np.greater(probabilities, 0.5 * np.ones((probabilities.shape[1], 1)))
    accuracy = np.count_nonzero(np.equal(predictions, targets)) / probabilities.shape[0] * 100
    return accuracy

In [12]:

# Maximum no of iter
max_iterations = 20  

# Convergence
tolerance = 0.1  

# Regularization
regularization_term = 1  



In [13]:
# coefficients
coefficients_old, coefficients = np.ones((30, 1)), np.zeros((30, 1))
iteration_count = 0
coefficients_converged = False



In [14]:
# Main loop
while not coefficients_converged:
    print('Validation Accuracy after iteration {}: {}%'.format(iteration_count + 1, evaluate_model(val_features, val_targets, coefficients_old)))
    coefficients_old = coefficients
    coefficients = newton_raphson_step(coefficients, train_targets, train_features, regularization_term)
    iteration_count += 1
    coefficients_converged = has_converged(coefficients_old, coefficients, tolerance, iteration_count)


Validation Accuracy after iteration 1: 39.56043956043956%
-------------
[[2.20041051]
 [0.00883832]
 [0.05418487]
 [0.01606687]
 [0.01103325]
 [0.15081524]
 [0.41004335]
 [0.46876387]
 [0.11080624]
 [0.20780549]
 [0.72554204]
 [0.05812243]
 [0.09192721]
 [0.01514838]
 [0.00945185]
 [0.08170142]
 [0.27383756]
 [0.06702955]
 [0.01624766]
 [0.02181531]
 [1.24176359]
 [0.0395791 ]
 [0.00455729]
 [0.00671306]
 [0.03523705]
 [0.58052401]
 [0.90475272]
 [0.89066198]
 [0.09421879]
 [0.1698731 ]]
Validation Accuracy after iteration 2: 60.43956043956044%
-------------
[[1.37991900e+00]
 [1.43061426e-02]
 [5.20998504e-02]
 [1.08948782e-02]
 [8.51021653e-02]
 [1.50024630e-01]
 [2.78784324e-01]
 [3.19458487e-01]
 [1.49143484e-02]
 [9.33440895e-02]
 [6.16739444e-01]
 [2.36990097e-01]
 [1.61795895e-01]
 [1.45721941e-02]
 [1.58538245e-02]
 [6.17352003e-02]
 [2.14860380e-01]
 [3.91525970e-02]
 [7.59889148e-04]
 [1.67070508e-02]
 [5.77188079e-01]
 [5.55428408e-02]
 [2.77648947e-03]
 [3.37813714e-03]
 [1

In [15]:

# After training, evaluate the model on the test set
print('After {} Iterations'.format(iteration_count))
print('Testing Accuracy: {}%'.format(evaluate_model(test_features, test_targets, coefficients)))


After 20 Iterations
Testing Accuracy: 95.65217391304348%


In [16]:
# After training, evaluate the model on the training set
print('After {} Iterations'.format(iteration_count))
print('Training Accuracy: {}%'.format(evaluate_model(train_features, train_targets, coefficients)))


After 20 Iterations
Training Accuracy: 96.92307692307692%


In [17]:
# After training, evaluate the model on the validation set
print('After {} Iterations'.format(iteration_count))
print('Validation Accuracy: {}%'.format(evaluate_model(val_features, val_targets, coefficients)))


After 20 Iterations
Validation Accuracy: 95.6043956043956%
