In [4]:
import copy

import numpy as np
import matplotlib.pyplot as plt

In [5]:
X_train = np.array([[0.5, 1.5], [1,1], [1.5, 0.5], [3, 0.5], [2, 2], [1, 2.5]])
y_train = np.array([0, 0, 0, 1, 1, 1])

In [6]:
# Sigmoid Function
def sigmoid(z):
    f_wb = 1 / (1 + np.exp(-z))
    
    return f_wb

In [7]:
# Logistic loss function
def function_cost(X, y, w, b):
    m = X.shape[0]
    cost = 0
    
    for i in range(m):
        z_i = np.dot(X[i], w) + b
        f_wb_i = sigmoid(z_i)
        cost = cost + (-y[i] * np.log(f_wb_i) - (1 - y[i]) * np.log(f_wb_i))
    
    return cost

In [12]:
# 计算偏导数
def function_gradient(X, y, w, b):
    m, n = X.shape
    
    dj_dw = np.zeros(n)
    dj_db = 0.
    
    for i in range(m):
        z_i = np.dot(X[i], w) + b
        f_wb_i = sigmoid(z_i)
        error = f_wb_i - y[i]
        for j in range(n):
            dj_dw[j] = dj_dw[j] + error * X[i, j]
        dj_db = dj_db + error
        
    dj_dw = dj_dw / m
    dj_db = dj_db / m
    
    return dj_dw, dj_db

In [13]:
def gradient_descent(X, y, w_star, b_star, alpha, iterations, cost_function, gradient_function):
    m = X.shape[0]
    if m == 0:
        return 0
    
    J_history = []
    w = copy.deepcopy(w_star)
    b = b_star
    
    for i in range(iterations):
        cost = cost_function(X, y, w, b)
        J_history.append(cost)
        
        dj_dw, dj_db = gradient_function(X, y, w, b)
        w = w - alpha * dj_dw
        b = b - alpha * dj_db
    
    return w, b, J_history

In [14]:
w_tmp  = np.zeros_like(X_train[0])
b_tmp  = 0.
alph = 0.1
iters = 10000

w_out, b_out, _ = gradient_descent(X_train, y_train, w_tmp, b_tmp, alph, iters, function_cost, function_gradient) 
print(f"\nupdated parameters: w:{w_out}, b:{b_out}")


updated parameters: w:[5.28123029 5.07815608], b:-14.222409982019837
