# Newton's Method

To determine the best values for the model parameters in logistic regression, Newton's Method is a potent optimization technique. In comparison to first-order techniques like gradient descent, this second-order optimization strategy offers faster convergence by combining data from the gradient and the Hessian matrix.

## Recap of Logistic Regression and the Cost Function

The relationship between input feature probability and a binary outcome is modeled by logistic regression. In logistic regression, the cost function is commonly represented by the binary cross-entropy loss:

$$ J(\theta) = -\frac{1}{m}\sum_{i=1}^{m}\left[ y^{(i)}\log(\hat{y}^{(i)}) + (1-y^{(i)})\log(1-\hat{y}^{(i)}) \right] $$

The number of training examples is $ m $, the actual label is $ y^{(i)} $, and the predicted probability is $ \hat{y}^{(i)} $.

## Newton's Method Update Rule

Newton's Method uses the inverse of the Hessian matrix ($ H $) to update the parameters ($ \theta $). The following is the parameter update rule:

$$ \theta_{\text{new}} = \theta_{\text{old}} - H^{-1}(\theta_{\text{old}}) \nabla J(\theta_{\text{old}}) $$

where $ H $ is the Hessian matrix and $ \nabla J(\theta) $ is the cost function's gradient.

## Hessian Matrix for Logistic Regression

The definition of the logistic regression Hessian matrix is:

$$ H = \frac{1}{m}\sum_{i=1}^{m} \hat{y}^{(i)}(1-\hat{y}^{(i)}) \mathbf{x}^{(i)}(\mathbf{x}^{(i)})^T $$

The predicted probability in this case is $ \hat{y}^{(i)} $, and the feature vector for the $ i $-th example is $ \mathbf{x}^{(i)} $.

## Implementation

Let's implement Newton's method for logistic regression in Python:

```Python
import numpy as np

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def compute_cost(theta, X, y):
    m = len(y)
    h = sigmoid(np.dot(X, theta))
    cost = -1/m * (np.dot(y, np.log(h)) + np.dot((1 - y), np.log(1 - h)))
    return cost

def compute_gradient(theta, X, y):
    m = len(y)
    h = sigmoid(np.dot(X, theta))
    gradient = np.dot(X.T, (h - y)) / m
    return gradient

def compute_hessian(theta, X):
    m = X.shape[0]
    h = sigmoid(np.dot(X, theta))
    Hessian = (X.T @ np.diag(h) @ np.diag(1 - h) @ X) / m
    return Hessian

def newtons_method(X, y, theta, num_iterations):
    for _ in range(num_iterations):
        gradient = compute_gradient(theta, X, y)
        Hessian = compute_hessian(theta, X)
        theta = theta - np.linalg.inv(Hessian) @ gradient

    return theta

# Example usage:
# Assuming X is your feature matrix and y is the target variable
# Add a column of ones to X for the bias term
X = np.column_stack((np.ones(m), X))
theta = np.zeros(X.shape[1])
num_iterations = 10

theta = newtons_method(X, y, theta, num_iterations)

## Executable content: Newton's Method

### Install necessary libraries used for interactiveness on graphs

1. pip install plotly

2. pip install ipywidgets

In this example, the code uses the breast cancer dataset and performs logistic regression optimized by Newton's Method:


In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
import plotly.graph_objects as go
import plotly.express as px
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix

#loading breast cancer dataset
cancer = load_breast_cancer()
cancer_df = pd.DataFrame(data=np.c_[cancer['data'], cancer['target']],
                         columns=np.concatenate([cancer['feature_names'], ['target']]))

#for visualization
features = ['mean radius', 'mean texture']
cancer_df['target'] = np.where(cancer_df['target'] == 0, 0, 1)

#data prep
X = cancer_df[features]
y = cancer_df['target']


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

#logistic regression model
model = LogisticRegression(solver='newton-cg')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

#plot decision boundry
def plot_decisionboundary(X, y, features):
    fig = go.Figure()

    for threshold in np.linspace(0.1, 0.9, 9):
        model = LogisticRegression()
        model.fit(X, y > threshold)
        y_pred = model.predict(X)

        trace = go.Scatter(x=X[:, 0], y=X[:, 1], mode='markers',
                           marker=dict(color=y_pred, colorscale='Viridis', size=10,
                                       line=dict(color='black', width=0.5)),
                           name=f'Threshold={threshold:.1f}')

        fig.add_trace(trace)

    fig.update_layout(title='Logistic Regression Decision Boundary with Slider',
                      xaxis=dict(title=features[0]),
                      yaxis=dict(title=features[1]),
                      sliders=[dict(steps=[dict(method='relayout',
                                                args=['shapes', []])],
                                    active=0,
                                    visible=True,
                                    x=0.1,
                                    y=0,
                                    len=0.9)])

    fig.show()



plot_decisionboundary(X_scaled, y, features)

# visualize confusion matrix 
def plot_confusionmatrix(X, y):
    fig = go.Figure()

    for threshold in np.linspace(0.1, 0.9, 9):
        model = LogisticRegression()
        model.fit(X, y > threshold)
        y_pred = model.predict(X)
        conf_matrix = confusion_matrix(y > threshold, y_pred)

        trace = go.Heatmap(z=conf_matrix, zmin=0, zmax=len(X), colorscale='Viridis',
                           x=['Predicted 0', 'Predicted 1'], y=['Actual 0', 'Actual 1'],
                           name=f'Threshold={threshold:.1f}')

        fig.add_trace(trace)

    fig.update_layout(title='Confusion Matrix with Slider',
                      xaxis=dict(title='Predicted Label'),
                      yaxis=dict(title='Actual Label'),
                      sliders=[dict(steps=[dict(method='relayout',
                                                args=['shapes', []])],
                                    active=0,
                                    visible=True,
                                    x=0.1,
                                    y=0,
                                    len=0.9)])

    fig.show()

plot_confusionmatrix(X_scaled, y)

Accuracy: 90.35%


In [2]:
import plotly.graph_objects as go
import plotly.express as px
import ipywidgets as widgets
from ipywidgets import interactive
from sklearn.metrics import accuracy_score, roc_curve, auc

'''
# plot ROC curve 
def plot_roc_curve(X, y):
    thresholds = np.linspace(0.1, 0.9, 9)
    fig = go.Figure()

    for threshold in thresholds:
        model = LogisticRegression()
        model.fit(X, y > threshold)
        y_pred_prob = model.predict_proba(X)[:, 1]
        fpr, tpr, _ = roc_curve(y, y_pred_prob)
        auc_value = auc(fpr, tpr)

        trace = go.Scatter(x=fpr, y=tpr, mode='lines',
                           name=f'Threshold={threshold:.1f}, AUC={auc_value:.2f}')

        fig.add_trace(trace)

    fig.update_layout(title='ROC Curve with Slider',
                      xaxis=dict(title='False Positive Rate'),
                      yaxis=dict(title='True Positive Rate'),
                      sliders=[dict(steps=[dict(method='relayout',
                                                args=['shapes', []])],
                                    active=0,
                                    visible=True,
                                    x=0.1,
                                    y=0,
                                    len=0.9)])

    fig.show()
'''

roc_curve_slider = widgets.FloatSlider(value=0.5, min=0.1, max=0.9, step=0.1, description='Threshold:')


def update_roc_curve(threshold):
    model.fit(X_scaled, y > threshold)
    y_pred_prob = model.predict_proba(X_scaled)[:, 1]
    fpr, tpr, _ = roc_curve(y, y_pred_prob)
    auc_value = auc(fpr, tpr)

    fig = go.Figure()

    trace = go.Scatter(x=fpr, y=tpr, mode='lines',
                       name=f'Threshold={threshold:.1f}, AUC={auc_value:.2f}')

    fig.add_trace(trace)

    fig.update_layout(title='ROC Curve with Slider',
                      xaxis=dict(title='False Positive Rate'),
                      yaxis=dict(title='True Positive Rate'))

    fig.show()

# Create an interactive widget for the ROC curve with slider
interactive_plot_roc_curve = interactive(update_roc_curve, threshold=roc_curve_slider)

# Display the interactive plot
print("Adjust the threshold with the slider:")
interactive_plot_roc_curve

Adjust the threshold with the slider:


interactive(children=(FloatSlider(value=0.5, description='Threshold:', max=0.9, min=0.1), Output()), _dom_clas…

In [3]:
#3D decision boundary
def plot_3d_decision_boundary(X, y, features):
    thresholds = np.linspace(0.1, 0.9, 9)
    fig = go.Figure()

    for threshold in thresholds:
        model = LogisticRegression()
        model.fit(X, y > threshold)

        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))

        Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
        Z = Z.reshape(xx.shape)

        trace = go.Surface(x=xx, y=yy, z=Z, opacity=0.5, showscale=False,
                           colorscale='Viridis', name=f'Threshold={threshold:.1f}')

        fig.add_trace(trace)

    fig.update_layout(title='3D Decision Boundary with Slider',
                      scene=dict(xaxis_title=features[0],
                                 yaxis_title=features[1],
                                 zaxis_title='Predicted Label'),
                      sliders=[dict(steps=[dict(method='relayout',
                                                args=['scene.camera', dict(up=dict(z=0, x=0, y=0)),
                                                      'scene.annotations', []])],
                                    active=0,
                                    visible=True,
                                    x=0.1,
                                    y=0,
                                    len=0.9)])

    fig.show()

#slider
decision_boundary_slider = widgets.FloatSlider(value=0.5, min=0.1, max=0.9, step=0.1, description='Threshold:')


def update_3d_decision_boundary(threshold):
    model.fit(X_scaled, y > threshold)

    x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1
    y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    fig = go.Figure()

    trace = go.Surface(x=xx, y=yy, z=Z, opacity=0.5, showscale=False,
                       colorscale='Viridis', name=f'Threshold={threshold:.1f}')

    fig.add_trace(trace)

    fig.update_layout(title='3D Decision Boundary with Slider',
                      scene=dict(xaxis_title=features[0],
                                 yaxis_title=features[1],
                                 zaxis_title='Predicted Label'))

    fig.show()


interactive_plot_3d_decision_boundary = interactive(update_3d_decision_boundary, threshold=decision_boundary_slider)


print("Adjust the threshold with the slider:")
interactive_plot_3d_decision_boundary

Adjust the threshold with the slider:


interactive(children=(FloatSlider(value=0.5, description='Threshold:', max=0.9, min=0.1), Output()), _dom_clas…