In [17]:
def naive_gradient_descent_with_MSE(x,y,alpha=0.001,iterations=1000,tolerance=0.0001,theta_1=0):
    
    #this represents 1/m
    avg = 1/(len(x))
    
    # Set up our loop to handle the case where we fail to converge
    for i in range(iterations):
        
        # Start the process for calculating the Mean Squared Error
        MSE = 0
        
        for counter in range(len(x)):            
            MSE +=  ( (theta_1 * x[counter]) - y[counter]) * x[counter]
        
        #update theta        
        new_theta_1 = theta_1 -(alpha*avg*MSE)
        
        #If we converge, break the loop
        if abs(theta_1-new_theta_1) < tolerance:
            return new_theta_1
        
        #update the values of theta
        theta_1 = new_theta_1
        
    #if we don't converge, we need to handle it
    print('Failed to converge')
    

In [18]:
test_set_x = [i for i in range(1,11)]
test_set_y = [i for i in range(1,11)]

In [21]:
naive_gradient_descent_with_MSE(test_set_x,test_set_y,alpha=0.01,tolerance=0.0001)

0.9998416011282122

# True cost function

$\textbf{Cost function: }$ $J_{(\theta_0,\theta_1)} = \frac{1}{m}\sum\limits_{i=1}^{m}(Y_{true}^{(i)}-Y_{predicted}^{(i)})^{2}$

$\textbf{Cost function: }$ $J_{(\theta_0,\theta_1)} = \frac{1}{2m}\sum\limits_{i=1}^{m}(y^{(i)}-h_{\theta}(x^{(i)}))^{2}$

# Partial Derivative of $\theta_{1}$

$\frac{\partial}{\partial_{\theta_{1}}}J(\theta_{0},\theta_{1})$ = $\frac{1}{m}\sum^{m}_{i=1}(h_{\theta}(x^{(i)})-y^{(i)}) * x^{(i)}$

$\frac{\partial}{\partial_{\theta_{1}}}J(\theta_{0},\theta_{1})$ = $\frac{1}{m}\sum^{m}_{i=1}((\theta_{0}+\theta_1x^{(i)})-y^{(i)}) * x^{(i)}$

# Partial Derivative of $\theta_{0}$

$\frac{\partial}{\partial_{\theta_{0}}}J(\theta_{0},\theta_{1})$ = $\frac{1}{m}\sum^{m}_{i=1}((\theta_{0}+\theta_1x^{(i)})-y^{(i)})$

In [39]:
def gradient_descent_with_MSE(x,y,alpha=0.001,iterations=1000,tolerance=0.0001,theta_0=0,theta_1=0):
    
    #define the average 
    avg = 1/(len(x))
    total_error = sum([(theta_0 + (theta_1*x[i]) - y[i])**2 for i in range(len(x))])
    
    # Set up our loop to handle the case where we fail to converge
    for i in range(iterations):
        
        
        
        MSE_theta_0 = 0
        MSE_theta_1 = 0
        
        
        for counter in range(len(x)):            
            MSE_theta_1 +=   ((theta_0 + theta_1 * x[counter]) - y[counter]) * x[counter]
        for counter in range(len(x)):            
            MSE_theta_0 +=   ((theta_0 + theta_1 * x[counter]) - y[counter])
            
        new_theta_1 = theta_1 - ( alpha * avg* MSE_theta_1)
        new_theta_0 = theta_0 - (alpha * avg * MSE_theta_0)
        
        new_total_error = sum([(new_theta_0 + (new_theta_1*x[i]) - y[i])**2 for i in range(len(x))])
        
        if abs(total_error-new_total_error) <=tolerance:
            return new_theta_0,new_theta_1,i
        total_error = new_total_error
        theta_0 = new_theta_0
        theta_1 = new_theta_1
    
    
    return new_theta_0,new_theta_1

In [40]:
x_grad = [2,4,6,8,10,12,14,16,18,20] 
y_grad = [5,7,9,11,13,15,17,19,21,23]

In [44]:
gradient_descent_with_MSE(x_grad,y_grad,alpha=0.01,iterations=10000,tolerance=0.00001)

(2.9669717880244453, 1.0023624285269603, 2098)

In [58]:
import random 

In [154]:
def stochastic_descent_with_MSE(x,y,alpha=0.001,iterations=1000,tolerance=0.0001,theta_0=0,theta_1=0):
    
    #define the average 
    grabber = random.randint(0, len(x)-1 )  
    total_error = (theta_0 + (theta_1*x[z]) - y[z])**2
    
    # Set up our loop to handle the case where we fail to converge
    for i in range(iterations):
        
        grabber = random.randint(0, len(x)-1 )  
        
        MSE_theta_1 =   ((theta_0 + (theta_1 * x[grabber])) - y[grabber]) * x[grabber]         
        MSE_theta_0 =   (theta_0 + (theta_1 * x[grabber])) - y[grabber]
            
        new_theta_1 = theta_1 - (alpha * MSE_theta_1)
        new_theta_0 = theta_0 - (alpha *  MSE_theta_0)
        
        new_total_error = (new_theta_0 + (new_theta_1*x[grabber]) - y[grabber])**2
        
        if abs(total_error-new_total_error) <= tolerance:
            return new_theta_0 ,new_theta_1
        
        
        total_error = new_total_error
        theta_0 = new_theta_0
        theta_1 = new_theta_1
    
    
    return new_theta_0,new_theta_1

In [155]:
x_grad = [2,4,6,8,10,12,14,16,18,20] 
y_grad = [5,7,9,11,13,15,17,19,21,23]

In [156]:
x_avg = []
y_avg = []
for i in range(0,100):
    x,y = stochastic_descent_with_MSE(x_grad,y_grad,alpha=0.01,iterations=1000000,tolerance=0.0001)
    x_avg.append(x)
    y_avg.append(y)

(sum(x_avg)/len(x_avg),sum(y_avg)/len(y_avg))

NameError: name 'z' is not defined