In [1]:

def f(x):
    return 5 * x**4 + 3 * x**2 + 10
def df(x):
    return 20 * x**3 + 6 * x
def gradient_descent(learning_rate, max_iterations):

    x = 1.0
    for i in range(max_iterations):
        gradient = df(x)
        prev_x = x
        x = x - learning_rate * gradient

        if abs(x - prev_x) < 1e-6:
            print(f"Converged after {i+1} iterations")
            break
    return x
learning_rate = 0.01
max_iterations = 1000
min_x = gradient_descent(learning_rate, max_iterations)
min_value = f(min_x)

print(f"Minimum occurs at x = {min_x}")
print(f"Minimum value = {min_value}")

Converged after 166 iterations
Minimum occurs at x = 1.5377093794492703e-05
Minimum value = 10.000000000709365


In [7]:
e = 2.718281828459045

def g(x, y):
    return 3 * x**2 + 5 * (e**-y) + 10

def dg_dx(x, y):
    return 6 * x
def dg_dy(x, y):
    return -5 * (e**-y)

def gradient_descent_2d(learning_rate, max_iterations):
    x = 1.0
    y = 1.0

    for i in range(max_iterations):
        grad_x = dg_dx(x, y)
        grad_y = dg_dy(x, y)
        prev_x = x
        prev_y = y
        x = x - learning_rate * grad_x
        y = y - learning_rate * grad_y

        if ((x - prev_x)**2 + (y - prev_y)**2)**0.5 < 1e-6:
            print(f"Converged after {i+1} iterations")
            break

    return x, y

learning_rate = 0.01
max_iterations = 1000
min_x, min_y = gradient_descent_2d(learning_rate, max_iterations)
min_value = g(min_x, min_y)

print(f"Minimum occurs at x = {min_x} and y = {min_y}")
print(f"Minimum value = {min_value}")


Minimum occurs at x = 1.3423123924933694e-27 and y = 3.9663707936306647
Minimum value = 10.094710265808493


In [8]:
import math

def gradient_descent_sigmoid(learning_rate, max_iterations):

    xi = 1.0
    def sigmoid(x):
        return 1 / (1 + math.exp(-x))
    def sigmoid_derivative(x):
        sig = sigmoid(x)
        return sig * (1 - sig)
    for i in range(max_iterations):
        gradient = sigmoid_derivative(xi)
        xi_new = xi - learning_rate * gradient
        if abs(gradient) < 1e-6:
            print(f"Converged after {i+1} iterations")
            break

        xi = xi_new

    return xi

learning_rate = 0.01
max_iterations = 1000
min_x = gradient_descent_sigmoid(learning_rate, max_iterations)

print(f"x = {min_x:.6f}")
print(f"sigmoid(x) = {1/(1 + math.exp(-min_x)):.6f}")

x = -1.240692
sigmoid(x) = 0.224316


In [9]:
def gradient_descent_linear_model(inputs, expected_outputs, learning_rate, num_iterations):
    M, C = 0, 0
    n = len(inputs)
    for i in range(num_iterations):
        grad_M = sum(-2 * x * (y - (M * x + C)) for x, y in zip(inputs, expected_outputs)) / n
        grad_C = sum(-2 * (y - (M * x + C)) for x, y in zip(inputs, expected_outputs)) / n
        M = M - learning_rate * grad_M
        C = C - learning_rate * grad_C
        if abs(grad_M) < 1e-6 and abs(grad_C) < 1e-6:
            print(f"Converged after {i+1} iterations")
            break

    return M, C

inputs = [1, 2, 3, 4]
expected_outputs = [2, 4, 6, 8]
learning_rate = 0.01
num_iterations = 1000

M, C = gradient_descent_linear_model(inputs, expected_outputs, learning_rate, num_iterations)
print(f"M = {M}, C = {C}")
     

M = 1.9896587550255742, C = 0.030404521305361965


In [1]:
def f(x):
    return 5 * x*4 + 3 * x*2 + 1

def f_prime(x):
    return 20 * x**3 + 6 * x

def gradient_descent(x0, learning_rate, epsilon, max_iterations):
    x = x0
    iteration = 0

    while iteration < max_iterations:
        gradient = f_prime(x)

        if abs(gradient) < epsilon:
            break

        x = x - learning_rate * gradient

        print(f"Iteration {iteration}: x = {x:.2f}, f(x) = {f(x):.2f}, f'(x) = {gradient:.2f}")

        iteration += 1

    print(f"\nMinimum value found at x = {x:.2f}, f(x) = {f(x):.2f} after {iteration} iterations.")
    return x


x0 = 1.0
learning_rate = 0.1
epsilon = 0.001
max_iterations = 100

gradient_descent(x0, learning_rate, epsilon, max_iterations)

Iteration 0: x = -1.60, f(x) = -40.60, f'(x) = 26.00
Iteration 1: x = 7.55, f(x) = 197.35, f'(x) = -91.52
Iteration 2: x = -858.40, f(x) = -22317.43, f'(x) = 8659.53
Iteration 3: x = 1265029831.91, f(x) = 32890775630.67, f'(x) = -12650306903.12
Iteration 4: x = -4048855683368040987719892992.00, f(x) = -105270247767569061282670706688.00, f'(x) = 40488556833680405479152418816.00
Iteration 5: x = 132747663895560379390685320115646207514183660413405746239735203098413203015447085056.00, f(x) = 3451439261284570133757284994513199342038926040944856138604559505964468089437726703616.00, f'(x) = -1327476638955603632147173198252623307139746082016273420574485495740697143532809355264.00
Iteration 6: x = -4678543339217819506657213367159102987107243120201274216005043431620135028306039605992472540317040111014367579117330688498735374561848577516276661636325884873626994607943932732171380132292365419381254142682217560731139831875759501782252559749045288960.00, f(x) = -12164212681966330081397344148243287929

OverflowError: (34, 'Result too large')