# Linear Regression Machine Learning (tutorial)
https://www.youtube.com/watch?v=uwwWVAgJBcM

In [1]:
from numpy import *

In [7]:
def compute_error_for_line_given_points(b, m, points):
    #initialize it at 0
    totalError = 0
    #for every point
    for i in range(0, len(points)):
        #get the x value
        x = points[i, 0]
        #get the y value
        y = points[i, 1]
        #get the difference, square it, and add it to the total
        totalError += (y - (m * x + b)) ** 2
    
    #get the average
    return totalError / float(len(points))

In [3]:
def gradient_descent_runner(points, starting_b, starting_m, learning_rate, num_iterations):
    #starting b and m
    b = starting_b
    m = starting_m
    
    #gradient descent
    for i in range(num_iterations):
        #update b and m with the new, more accurate b and m
        #by performing this gradient step
        b, m = step_gradient(b, m, array(points), learning_rate)
        
    return [b, m]

In [4]:
#magic, the greatest, the greatest
def step_gradient(b_current, m_current, points, learning_rate):
    #gradient means slope and it's going to act like a compass that points downhill, which direction we should be going
    #the lowest point is where the error is the smallest, which will be the best fit
    
    #starting points for our gradients
    b_gradient = 0
    m_gradient = 0
    
    N = float(len(points))
    
    for i in range(0, len(points)):
        x = points[i, 0]
        y = points[i, 1]
        #direction with respect to b and m
        #computing partial derivatives of our error function
        b_gradient +=    -(2/N) * (y - ((m_current * x) + b_current))
        m_gradient += x * (2/N) * (y - ((m_current * x) + b_current)) 
        
    #update our b and m values using our partial derivatives
    new_b = b_current - (learning_rate * b_gradient)
    new_m = m_current - (learning_rate * m_gradient)
    return [new_b, new_m]

In [5]:
def run():
    #Step 1 - collect our data
    points = genfromtxt('data.csv', delimiter=',')
    
    #Step 2 - define our hyperparameters
    #how fast should our model converge?
    learning_rate = 0.0001
    #y = mx + b (slope formula)
    initial_b = 0
    initial_m = 0
    num_iterations = 1000
    #Step 3 - train our model
    print('Starting gradient descent at b = {0}, m = {1}, error = {2}'.format(initial_b, initial_m, compute_error_for_line_given_points(initial_b, initial_m, points)))
    [b, m] = gradient_descent_runner(points, initial_b, initial_m, learning_rate, num_iterations)
    print('Ending point at b = {0}, m = {1}, error = {2}'.format(num_iterations, b, m, compute_error_for_line_given_points(b, m, points)))

In [8]:
if __name__ == '__main__':
    run()

Starting gradient descent at b = 0, m = 0, error = 5565.107834483211
Ending point at b = 1000, m = 9.451159314734957e+173, error = -4.808747047443573e+175


  totalError += (y - (m * x + b)) ** 2
