In [84]:
import numpy  as np
import pandas as pd
import random as rn
import math

#### Linear Algebra Functions

In [69]:
def dot(v_1, v_2):
    return sum( v_1_i * v_2_i for v_1_i, v_2_i in zip(v_1, v_2) )

def sum_of_squares(v):
    return dot(v, v)
    

#### Statistical Functions

In [56]:
def mean_norm(X):
    x_bar = mean(X)
    
    return [x_i - x_bar for x_i in X]

def variance(X):
    n          = len(X)
    deviations = mean_norm(X)
    
    return sum_of_squares(deviations) /\
           (n - 1)

def standard_deviation(X):
    return math.sqrt( variance(X) )

def covariance(X, y):
    n = len(X)
    
    return dot( mean_norm(X), mean_norm(y) ) /\
           (n - 1)

def correlation(X, y):
    std_dev_x = standard_deviation(X)
    std_dev_y = standard_deviation(y)
    
    if std_dev_x > 0 and std_dev_y > 0:
        return covariance(X, y) /\
               std_dev_x / std_dev_y
    else:
        return 0

#### Simple Linear Regression

In [57]:
def predict(intercept, beta, X):
    return intercept + (beta * X)

In [58]:
def error(intercept, beta, X, y):
    return y - predict(intercept, beta, X)

In [80]:
def sum_of_squared_errors(intercept, beta, X, y):
    return sum( error(intercept, beta, x_i, y_i) ** 2
                for x_i, y_i in zip(X, y) )

In [60]:
def least_squares_fit(X, y):
    beta      = ( correlation(X, y) * standard_deviation(y) ) /\
                standard_deviation(X)
    intercept = mean(y) - beta * mean(X)
    
    return intercept, beta

#### Obtain Coefficients

In [74]:
X = np.random.randint( 1, 5000, 100 ) 
y = np.random.randint( 1, 5, 100 ) 

intercept, beta = least_squares_fit(X, y)

#### Get Prediction

In [77]:
def prediction(X):
    intercept, beta = least_squares_fit(X, y)

    return intercept + (beta * X)

#### Estimate Errors

In [82]:
def total_sum_of_squares(y):
    return sum( v ** 2 for v in mean_norm(y) )

def r_sqaured(intercept, beta, X, y):
    return 1.0 - ( sum_of_squared_errors(intercept, beta, X, y) /\
                   total_sum_of_squares(y) )

In [83]:
r_sqaured(intercept, beta, X, y)

7.955578450458134e-06

#### Using Gradient Descent

In [85]:
def squared_error(X, y, theta):
    intercept, beta = theta
    
    return error(intercept, beta, X, y) ** 2

def squared_error_gradient(X, y, theta):
    intercept, beta = theta
    calc_error      = -2 * error( intercept, beta, X, y )
    
    return [ calc_error, (calc_error * X) ]

def negate(f):
    return lambda *args, **kwargs: -f(*args, **kwargs)

def negate_all(f):
    return lambda *args, **kwargs: [ -y for y in f(*args, **kwargs) ]



def in_random_order(data):
    indexes = [ i for i, _ in enumerate(data) ]
    random.shuffle(indexes)
    
    for i in indexes:
        yield data[i]

def minimize_stochastic( target_function, gradient_function, X, y, theta_0, alpha_0 ):
    data                           = zip(X, y)
    theta                          = theta_0
    alpha                          = alpha_0
    min_theta, min_value           = None, float('inf')
    iterations_with_no_improvement = 0
    
    while iterations_with_no_improvement < 100:
        value = sum( target_function(x_i, y_i, theta) for x_i, y_i in data )
        
        if value < min_value:
            min_theta, min_value           = theta, value
            iterations_with_no_improvement = 0
            alpha                          = alpha_0
            
        else:
            iterations_with_no_improvement += 1
            alpha                          *= 0.9
            
        for x_i, y_i in in_random_order(data):
            gradient_i = gradient_function(x_i, y_i, theta)
            theta      = vector_subtract( theta, scaler_multiply(alpha, gradient_i) )
            
        return min_theta
    
def maximize_stochastic( target_function, gradient_function, X, y, theta_0, alpha_0 ):
    return minimize_stochastic( negate(target_function),
                                negate_all(gradient_function),
                                X,
                                y,
                                theta_0,
                                alpha_0 )

In [86]:
rn.seed(0)

theta           = [ rn.random(), rn.random() ]
intercept, beta = minimize_stochastic( squared_error,
                                       squared_error_gradient,
                                       X,
                                       y,
                                       theta,
                                       0.001 )

print(intercept, beta)

NameError: name 'minimize_stochastic' is not defined