In [2]:
import numpy as np

X = np.array([2.5, 3.5, 5.6, 2.2, 6.9, 9.6],dtype=float)
y = np.array([0,0,1,0,1,1],dtype=float)

alpha = 0.001
m = len(y)

In [3]:
#Batch Gradient Descent
theta = np.array([0,0],dtype=float)
X_bias = np.hstack((np.ones((m,1)),X.reshape(-1,1)))

def cost_func(predictions, y):
    return (-1/m) * np.sum(y * np.log(predictions) + (1-y) * np.log(1-predictions))

def hypothesis(X,theta):
    return 1 / (1 + np.exp(-(X @ theta)))

iterations = 100
for i in range(1,iterations+1):
    predictions = hypothesis(X_bias,theta)
    errors = predictions - y
    gradient = 1/m * (errors @ X_bias)
    theta -= alpha * gradient

    if (i%10==0 or i == 1):
        print(f"Iteration {i}: Cost: {cost_func(predictions,y):.4f}, theta: {theta}")

print("\nFinal theta:", np.round(theta,4))


Iteration 1: Cost: 0.6931, theta: [0.         0.00115833]
Iteration 10: Cost: 0.6819, theta: [-6.43486058e-05  1.11703012e-02]
Iteration 20: Cost: 0.6711, theta: [-0.00026418  0.02147099]
Iteration 30: Cost: 0.6620, theta: [-0.00058815  0.03097525]
Iteration 40: Cost: 0.6541, theta: [-0.00102569  0.03975156]
Iteration 50: Cost: 0.6475, theta: [-0.00156707  0.04786326]
Iteration 60: Cost: 0.6417, theta: [-0.00220332  0.05536849]
Iteration 70: Cost: 0.6368, theta: [-0.00292631  0.06232033]
Iteration 80: Cost: 0.6325, theta: [-0.00372861  0.068767  ]
Iteration 90: Cost: 0.6288, theta: [-0.00460348  0.0747522 ]
Iteration 100: Cost: 0.6256, theta: [-0.00554481  0.08031551]

Final theta: [-0.0055  0.0803]


In [4]:
#Stochastic Gradient Descent
theta = np.array([0,0],dtype=float)
X_bias = np.hstack((np.ones((m,1)),X.reshape(-1,1)))

def hypothesis(X,theta):
    return 1/(1+ np.exp(-(X @ theta)))

def cost_func(predictions,y):
    return (-1/m) * np.sum(y*np.log(predictions) + (1-y)*np.log(1-predictions))

iterations = 100
for i in range(1,iterations+1):
    for j in range(m):
        x_j = X_bias[j,:].reshape(1,-1)
        y_j = y[j]
        predictions_j = hypothesis(x_j,theta)
        errors_j = predictions_j - y_j
        gradient_j = errors_j * x_j #no need for 1/m
        theta -= alpha * gradient_j.flatten()

    predictions = hypothesis(X_bias,theta)  
    cost = cost_func(predictions,y)
    
    if (i%10==0):
        print(f"Iteration {i}: Cost: {cost_func(predictions,y):.4f}, theta: {theta}")

print("\nFinal theta:", np.round(theta,4))

Iteration 10: Cost: 0.6404, theta: [-0.00201116  0.05641829]
Iteration 20: Cost: 0.6194, theta: [-0.00730865  0.09193569]
Iteration 30: Cost: 0.6098, theta: [-0.0145458   0.11522996]
Iteration 40: Cost: 0.6046, theta: [-0.02296517  0.13110946]
Iteration 50: Cost: 0.6012, theta: [-0.03212752  0.14231465]
Iteration 60: Cost: 0.5987, theta: [-0.04176632  0.15048359]
Iteration 70: Cost: 0.5965, theta: [-0.05171356  0.15663705]
Iteration 80: Cost: 0.5944, theta: [-0.06186018  0.16143169]
Iteration 90: Cost: 0.5924, theta: [-0.0721339   0.16529993]
Iteration 100: Cost: 0.5905, theta: [-0.08248609  0.16853142]

Final theta: [-0.0825  0.1685]


In [13]:
X = np.array([[1, 2.5],
              [1, 3.5],
              [1, 5.6],
              [1, 2.2],
              [1, 6.9],
              [1, 9.6]
              ])
y = np.array([0,0,1,0,1,1])
theta = np.array([1,2])
l = 10

predictions = 1 / (1 + np.exp(-(X@theta)))
print("Predictions:",predictions)

errors = predictions - y
print("Errors:",errors)

gradient = 1/6 * X.T @ errors
print("Gradient:",gradient)

reg_term = (l / 6) * theta  
reg_term[0] = 0  # Ensure theta_0 is not regularized
regularized_gradient = gradient + reg_term
print("Regularized Gradient:", regularized_gradient)

hessian = 1/6 * X.T @ np.diag(predictions * (1-predictions)) @ X
print("Hessian:",hessian)

reg_matrix = np.eye(X.shape[1])
reg_matrix[0, 0] = 0  # Ensure theta_0 is not regularized
regularized_hessian = hessian + l * reg_matrix
print("Regularized Hessian:", regularized_hessian)

theta = theta - np.linalg.inv(regularized_hessian) @ gradient
print("New theta:",theta)

Predictions: [0.99752738 0.99966465 0.99999497 0.99550373 0.99999963 1.        ]
Errors: [ 9.97527377e-01  9.99664650e-01 -5.03043030e-06  9.95503727e-01
 -3.73629798e-07 -1.68752989e-09]
Gradient: [0.49878172 1.36378703]
Regularized Gradient: [0.49878172 4.69712036]
Hessian: [[0.00121387 0.00286962]
 [0.00286962 0.00689369]]
Regularized Hessian: [[1.21386823e-03 2.86961572e-03]
 [2.86961572e-03 1.00068937e+01]]
New theta: [-409.85904114    1.98153478]
