In [45]:
import numpy as np
import matplotlib.pyplot as plt
import copy, math

In [46]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_train = np.array([0, 0, 0, 1, 1, 1])

In [47]:
def sigmoid(z):
  z = np.clip( z, -500, 500 )           # protect against overflow
  g = 1.0/(1.0+np.exp(-z))

  return g

In [48]:
def log_1pexp(x, maximum=20):
    out  = np.zeros_like(x,dtype=float)
    i    = x <= maximum
    ni   = np.logical_not(i)

    out[i]  = np.log(1 + np.exp(x[i]))
    out[ni] = x[ni]
    return out

In [49]:
def compute_cost_logistic(x, y, w, b, lambda_=0, safe=False):
  m,n = x.shape
  cost = 0.0
  for i in range(m):
      z_i    = np.dot(x[i],w) + b                                             #(n,)(n,) or (n,) ()
      if safe:  #avoids overflows
          cost += -(y[i] * z_i ) + log_1pexp(z_i)
      else:
          f_wb_i = sigmoid(z_i)                                                   #(n,)
          cost  += -y[i] * np.log(f_wb_i) - (1 - y[i]) * np.log(1 - f_wb_i)       # scalar
  cost = cost/m

  reg_cost = 0
  if lambda_ != 0:
      for j in range(n):
          reg_cost += (w[j]**2)                                               # scalar
      reg_cost = (lambda_/(2*m))*reg_cost

  return cost + reg_cost


In [50]:
def compute_gradient_logistic(x, y, w, b):
  m,n = x.shape
  dj_dw = np.zeros((n,))                           #(n,)
  dj_db = 0.

  for i in range(m):
      f_wb_i = sigmoid(np.dot(x[i],w) + b)          #(n,)(n,)=scalar
      err_i  = f_wb_i  - y[i]                       #scalar
      for j in range(n):
          dj_dw[j] = dj_dw[j] + err_i * x[i,j]      #scalar
      dj_db = dj_db + err_i
  dj_dw = dj_dw/m                                   #(n,)
  dj_db = dj_db/m                                   #scalar
      
  return dj_db, dj_dw  


In [51]:
def gradient_descent(x, y, w_initial, b_initial, alpha, num_iter):
  w = copy.deepcopy(w_initial)
  b = b_initial
  J_history = []
  
  for i in range(num_iter):
    dj_dw, dj_db = compute_gradient_logistic(x, y, b, w)
    w = w - alpha * dj_dw
    b = b - alpha * dj_db

    if i < 100000:
      J_history.append(compute_cost_logistic(x, y, w, b))
    if i % math.ceil(num_iter / 10) == 0:
      print(f"Iteration {i:4d}: Cost {J_history[-1]}   ")

    return w, b, J_history

In [52]:
w_initial = np.zeros_like(X_train[0])
b_initial = 0
alpha = 0.1
iters = 10000

w_final, b_final = gradient_descent(X_train, y_train, w_initial, b_initial, alpha, iters)
print(f"\nupdated parameters: w:{w_final}, b:{b_final}")


ValueError: setting an array element with a sequence.