In [5]:
def gradient_descent(f_prime, initial_x, learning_rate, tolerance, max_iterations):


    x = initial_x
    for i in range(max_iterations):
        gradient = f_prime(x)
        new_x = x - learning_rate * gradient
        if abs(new_x - x) < tolerance:
            return new_x, i + 1
        x = new_x
    return x, max_iterations

# Example usage
# Function: f(x) = 5x^4 + 3x^2 + 10
# Derivative: f'(x) = 20x^3 + 6x
def f_prime(x):
    return 20*x*x*x + 6*x

initial_x = 0  # Starting point
learning_rate = 0.1  # Step size
tolerance = 1e-6  # Tolerance for stopping
max_iterations = 1000  # Maximum number of iterations

min_x, iterations = gradient_descent(f_prime, initial_x, learning_rate, tolerance, max_iterations)
print(f"The minimum value is at x = {min_x}, found in {iterations} iterations.")

The minimum value is at x = 0.0, found in 1 iterations.


In [7]:
# Import sympy
import sympy as sp

# Define the symbols
x, y = sp.symbols('x y')

# Define the function
f = 3 * x**2 + 5 * sp.exp(-y) + 10

# Compute the partial derivatives
f_prime_x = sp.diff(f, x)
f_prime_y = sp.diff(f, y)

# Display the partial derivatives
print(f"f'_x(x, y) = {f_prime_x}")
print(f"f'_y(x, y) = {f_prime_y}")

# Gradient Descent Algorithm for two variables
def gradient_descent_2d(f_prime_x, f_prime_y, initial_x, initial_y, learning_rate, tolerance, max_iterations):

    x_val, y_val = initial_x, initial_y
    for i in range(max_iterations):
        grad_x = f_prime_x.subs({x: x_val, y: y_val})
        grad_y = f_prime_y.subs({x: x_val, y: y_val})
        new_x = x_val - learning_rate * grad_x
        new_y = y_val - learning_rate * grad_y
        if abs(new_x - x_val) < tolerance and abs(new_y - y_val) < tolerance:
            return (new_x, new_y), i + 1
        x_val, y_val = new_x, new_y
    return (x_val, y_val), max_iterations

# Initial guesses
initial_x = 0
initial_y = 0
learning_rate = 0.1
tolerance = 1e-6
max_iterations = 1000

# Run gradient descent
min_values, iterations = gradient_descent_2d(f_prime_x, f_prime_y, initial_x, initial_y, learning_rate, tolerance, max_iterations)
print(f"The minimum value is at x = {min_values[0]}, y = {min_values[1]}, found in {iterations} iterations.")

f'_x(x, y) = 6*x
f'_y(x, y) = -5*exp(-y)
The minimum value is at x = 0, y = 6.21980920407898, found in 1000 iterations.


In [11]:
def compute_square_error(error):

    # Clip the error to a reasonable range to prevent overflow
    error = min(max(error, -1e10), 1e10)
    return error ** 2

def gradient_descent(inputs, expected_outputs, initial_M, initial_C, learning_rate, tolerance, max_iterations):
    # Get the number of data points
    n = len(inputs)

    M, C = initial_M, initial_C #Initialize M, C

    for iteration in range(max_iterations):
        total_gradient_M = 0
        total_gradient_C = 0

        for i in range(n):
            predicted = M * inputs[i] + C
            error = compute_square_error(expected_outputs[i] - predicted) # compute_error should be compute_square_error and error should be the difference
            total_gradient_M += -2 * inputs[i] * error
            total_gradient_C += -2 * error

        # Average the gradients
        gradient_M = total_gradient_M / n
        gradient_C = total_gradient_C / n

        # Update M and C
        new_M = M - learning_rate * gradient_M
        new_C = C - learning_rate * gradient_C

        # Check for convergence
        if abs(new_M - M) < tolerance and abs(new_C - C) < tolerance:
            return (new_M, new_C), iteration + 1

        M, C = new_M, new_C

    return (M, C), max_iterations

# Example usage
inputs = [1, 2, 3, 4, 5]  # Example inputs
expected_outputs = [2, 4, 6, 8, 10]  # Example expected outputs
initial_M = 0.0  # Initial guess for M
initial_C = 0.0  # Initial guess for C
learning_rate = 0.01  # Learning rate
tolerance = 1e-6  # Tolerance for stopping
max_iterations = 1000  # Maximum number of iterations

optimal_parameters, iterations = gradient_descent(inputs, expected_outputs, initial_M, initial_C, learning_rate, tolerance, max_iterations)
print(f"The optimal values are M = {optimal_parameters[0]}, C = {optimal_parameters[1]}, found in {iterations} iterations.")


The optimal values are M = 5.964000000349156e+21, C = 1.9880000000864986e+21, found in 1000 iterations.
