In [1]:
# Our data
x_data = [1, 2, 3]
y_data = [2, 4, 5]

In [2]:
def model_predict(x, theta_0, theta_1):
  """Calculates the prediction h(x)"""
  return theta_0 + (theta_1 * x)

In [10]:
def cost_function(x_data, y_data, theta_0, theta_1):
  """Calculates the total cost J for our parameters"""
  total_error = 0
  n = len(x_data) # n is the number of data points (3 in our case)

  for i in range(n):
    # Get the x and y for this data point
    x = x_data[i]
    y = y_data[i]
    
    # 1. Get the line's prediction
    prediction = model_predict(x, theta_0, theta_1)
    
    # 2. Calculate the squared error
    error = (prediction - y) ** 2
    
    # 3. Add it to the total
    total_error = total_error + error
    
  # 4. Finish the formula (1/2 * sum)
  cost = (1 / 2) * total_error
  return cost

In [4]:
cost_1 = cost_function(x_data, y_data, 0, 1)
print(f"Cost for (theta_0=0, theta_1=1): {cost_1}")

Cost for (theta_0=0, theta_1=1): 4.5


In [5]:
cost_2 = cost_function(x_data, y_data, 0, 2)
print(f"Cost for (theta_0=0, theta_1=2): {cost_2}")

Cost for (theta_0=0, theta_1=2): 0.5


In [8]:
# Our data from the example
x_data = [1, 2, 3]
y_data = [2, 4, 5]

# Helper function to make a prediction
def model_predict(x, theta_0, theta_1):
  """Calculates the prediction h(x) = theta_0 + theta_1 * x"""
  return theta_0 + (theta_1 * x)

In [7]:
def calculate_gradient(x_data, y_data, theta_0, theta_1):
  """
  Calculates the gradient (the partial derivatives) of the cost function.
  """
  d_theta_0 = 0  # Stores the gradient for theta_0
  d_theta_1 = 0  # Stores the gradient for theta_1
  n = len(x_data)

  for i in range(n):
    x = x_data[i]
    y = y_data[i]
    
    # 1. Get the line's prediction
    prediction = model_predict(x, theta_0, theta_1)
    
    # 2. Calculate the error (prediction - actual)
    error = prediction - y
    
    # 3. Add to the gradient sums (based on the partial derivative formulas)
    
    # The derivative for theta_0 is the sum of the errors
    d_theta_0 += error 
    
    # The derivative for theta_1 is the sum of (error * x)
    d_theta_1 += (error * x)
    
  # Return the "uphill arrow" as two numbers
  return d_theta_0, d_theta_1

In [6]:
def gradient_descent(x_data, y_data, learning_rate, num_steps):
  """Performs gradient descent to find the best theta_0 and theta_1."""
  
  # Start with a guess (we'll just start at 0)
  theta_0 = 0.0
  theta_1 = 0.0
  
  # Run the loop for 'num_steps' times
  for _ in range(num_steps):
    
    # 1. Calculate the gradient (the "uphill arrow")
    grad_0, grad_1 = calculate_gradient(x_data, y_data, theta_0, theta_1)
    
    # 2. Update the parameters by taking a small step *downhill*
    #    (This is the update rule: theta = theta - (learning_rate * gradient))
    theta_0 = theta_0 - (learning_rate * grad_0)
    theta_1 = theta_1 - (learning_rate * grad_1)
    
  # After all the steps, return the final, trained parameters
  return theta_0, theta_1

In [11]:
# --- Settings ---
alpha = 0.01  # This is the Learning Rate
steps = 500   # This is the number of steps to take

# --- Run the algorithm! ---
final_theta_0, final_theta_1 = gradient_descent(x_data, y_data, alpha, steps)

print(f"After {steps} steps:")
print(f"Found theta_0 (intercept): {final_theta_0:.4f}")
print(f"Found theta_1 (slope):     {final_theta_1:.4f}")

After 500 steps:
Found theta_0 (intercept): 0.6657
Found theta_1 (slope):     1.5004
