In [31]:
import math 
from functools import partial

In [12]:
# gradient descent elements
def vector_subtract(v,w):
    """ subtracts corresponding elements of two vectors """
    return [v_i - w_i for v_i,w_i in zip(v,w)]

def sum_of_squares(v):
    squares = [v_i ** 2 for v_i in v]
    return sum(squares)

def magnitude(v):
    # length of a vector in projection
    # basically get the hypotenuse
    return math.sqrt(sum_of_squares(v))

def distance(v,w):
    """ distance between two vectors """
    return magnitude(vector_subtract(v,w))

def step(v,direction,step_size):
    return [v_i + step_size * direction_i
        for v_i, direction_i in zip(v,direction)]

def sum_of_squares_gradient(v):
    return [2 * v_i for v_i in v]

def safe(f):
    def safe_f(*args, **kwargs):
        try:
            return f(*args,**kwargs)
        except:
            return float('inf')
    return safe_f

In [13]:
# gradient descent
def minimize_batch(target_fn,gradient_fn,theta_0,tolerance=0.000001):
    """ use gradient descent to fined theta that minimizeds the target function """
    step_sizes = [100,10,1,0.1,0.01,0.001,0.0001,0.00001]
    
    theta = theta_0
    target_fn = safe(target_fn)
    value = target_fn(theta)
    
    while True: 
        gradient = gradient_fn(theta)
        next_thetas = [step(theta,gradient, -step_size) for
                      step_size in step_sizes]
        
        # choose the one that minimizes the error function
        next_theta = min(next_thetas, key=target_fn)
        next_value = target_fn(next_thetas)
        
        # stop if convergence
        if abs(value - next_value) < tolerance:
            return theta
        else:
            theta, value = next_theta,next_value

In [14]:
# stochastic gradient descent
def in_random_order(data):
    """ generator that returns the element of data in random order """
    indexes = [i for i, _ in enumerate(data)]
    random.shuffle(indexes)
    for i in indexes:
        yield data[i]
        
def minimize_stochastic(target_fn,gradient_fn,x,y,theta_0,alpha_0=0.01):
    data = zip(x,y)
    theta = theta_0
    alpha = alpha_0
    min_theta, min_value = None, float("inf")
    iterations_with_no_improvement = 0
    
    # if we go 100 iterations with no improvement stop
    while iterations_with_no_improvement < 0:
        value = sum(target_fn(x_i,y_i,theta) for x_i,y_i in data)
        
        if value < min_value:
            # if we found a new minumum, remember it
            min_theta, min_value = theta,value
            iterations_with_no_improvement = 0
            alpha = alpha_0
        else:
            # if there are no improvements, shrink the step size
            iterations_with_no_improvement += 1
            alpha *= 0.9
            
        for x_i,y_i in in_random_order(data):
            gradient_i = gradient_fn(x_i,y_i,theta)
            theta = vector_subtract(theta,scalar_multiply(alpha,gradient_i))
            
    return min_theta


In [29]:
# Dimension reduction elements
# from old chapters
def dot(a,b):
    return sum([a[i] * b[i] for i,_ in enumerate(a)])

def vector_sum(v):
    return sum(v)

#from current chapter
def direction(w):
    mag = magnitude(w)
    return [w_i/mag for w_i in w]

def directional_variance_i(x_i, w):
    """ The variance of he row x_i in the direction determined by w """
    return dot(x_i, direction(w)) ** 2

def directional_variance(X, w):
    """ variance of the data i the direction determined by w """
    return sum(directional_variance_i(x_i,w) for x_i in X)

def directional_variance_gradient_i(x_i, w):
    """ The variance of he row x_i in the direction determined by w """
    projection_length = dot(x_i,direction(w))
    return [2* projection_length * x_ij for x_ij in x_i]

def directional_variance_gradient(X, w):
    """ variance of the data i the direction determined by w """
    return vector_sum(directional_variance_gradient_i(x_i,w) for x_i in X)

In [30]:
direction([1,2,3,4])

[0.18257418583505536,
 0.3651483716701107,
 0.5477225575051661,
 0.7302967433402214]

In [23]:
magnitude([1,2,3,4])

5.477225575051661

In [26]:
a = [1,3,-5]
b = [4,-2,-1]

3