In [None]:
from sklearn.datasets import load_breast_cancer
# Load the dataset
data = load_breast_cancer()
X = data.data  # features
y = data.target  # target labels (0 for malignant, 1 for benign)

# Optionally, display the description - uncomment the following line to do so
print(data.DESCR)

In [None]:
import numpy as np

def sigmoid(x):
    z = np.clip( x, -500, 500 )           # protect against overflow
    return 1.0 / (1.0 + np.exp(-z))

In [50]:
def compute_cost_logistic(X, y, w, b, lambda_=0, safe=False):
    """
    Computes cost using logistic loss, non-matrix version

    Args:
      X (ndarray): Shape (m,n)  matrix of examples with n features
      y (ndarray): Shape (m,)   target values
      w (ndarray): Shape (n,)   parameters for prediction
      b (scalar):               parameter  for prediction
      lambda_ : (scalar, float) Controls amount of regularization, 0 = no regularization
      safe : (boolean)          True-selects under/overflow safe algorithm
    Returns:
      cost (scalar): cost
    """

    m,n = X.shape
    cost = 0.0
    for i in range(m):
        z_i    = np.dot(X[i],w) + b                                             #(n,)(n,) or (n,) ()
        if safe:  #avoids overflows
            cost += -(y[i] * z_i ) + log_1pexp(z_i)
        else:
            f_wb_i = sigmoid(z_i)                                                   #(n,)
            # cost  += -y[i] * np.log(f_wb_i) - (1 - y[i]) * np.log(1 - f_wb_i)       # scalar
            cost += -y[i] * np.log(f_wb_i + 1e-10) - (1 - y[i]) * np.log(1 - f_wb_i + 1e-10)
    cost = cost/m

    reg_cost = 0
    if lambda_ != 0:
        for j in range(n):
            reg_cost += (w[j]**2)                                               # scalar
        reg_cost = (lambda_/(2*m))*reg_cost

    return cost + reg_cost

In [None]:
import numpy as np

def calculate_gradient_for_loop(X, y, w, b):
    m, n = X.shape
    dw = np.zeros((n,))  # initialize the gradient vector
    db = 0.              # initialize the intercept gradient
    
    for i in range(m):
        z = np.dot(X[i], w) + b
        a = sigmoid(z)
        dz = a - y[i]
        for j in range(n):
            dw[j] += X[i][j] * dz
        db += dz
    return dw / m, db / m


In [None]:
import copy, math

def logistic_model(X, y, w_initial, b_initial, learning_rate=0.01, num_iterations=1000):
    J_history = []
    w = copy.deepcopy(w_initial)
    b = b_initial
    
    for i in range(num_iterations):
        # Calculate the predicted values
        dw, db = calculate_gradient_for_loop(X, y, w, b)
        w = w - learning_rate * dw
        b = b - learning_rate * db
    
            # Save cost J at each iteration
        if i<100000:      # prevent resource exhaustion 
            J_history.append( compute_cost_logistic(X, y, w, b) )

        # Print cost every at intervals 10 times or as many iterations if < 10
        if i% math.ceil(num_iterations / 10) == 0:
            print(f"Iteration {i:4d}: Cost {J_history[-1]}   ")    
    return w, b, J_history

In [56]:
import numpy as np
from sklearn.model_selection import train_test_split

train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)
w_tmp  = np.zeros_like(train_X[0])
b_tmp  = 0.
alpha = 0.1
iters = 1000

w_out, b_out, history = logistic_model(train_X, train_y, w_tmp, b_tmp, alpha, iters) 

print(f"\nupdated parameters: w:{w_out}, b:{b_out}")

Iteration    0: Cost 14.473392013068214   
Iteration  100: Cost 6.730633348681038   
Iteration  200: Cost 2.3784944915744997   
Iteration  300: Cost 2.4196300261105126   
Iteration  400: Cost 2.3785903387156364   
Iteration  500: Cost 2.0934272515705215   
Iteration  600: Cost 2.0748486389189518   
Iteration  700: Cost 2.024430062676937   
Iteration  800: Cost 2.0242506311123405   
Iteration  900: Cost 2.0247204090993542   

updated parameters: w:[ 6.46690480e+00  8.43467282e+00  3.75623407e+01  1.54548934e+01
  5.81674010e-02 -2.97686511e-02 -1.18255340e-01 -5.05056037e-02
  1.09511609e-01  4.68675301e-02  2.49197574e-02  6.14996934e-01
 -1.62778507e-01 -1.63074093e+01  3.07015386e-03 -6.83128692e-03
 -1.33559014e-02 -1.69507873e-03  9.84419097e-03  1.00396016e-03
  6.80168708e+00  1.05233822e+01  3.79435181e+01 -2.23383434e+01
  7.12320217e-02 -1.29573704e-01 -2.53849976e-01 -5.90139852e-02
  1.40974821e-01  4.29193121e-02], b:0.8453572267314364
