## implementing the algorithm

In [1]:
import numpy as np

In [2]:
def predict(feature_matrix, weights):
    return np.dot(feature_matrix, weights)

In [3]:
def feature_derivative(feature, errors):
    return 2 * np.dot(feature, errors)

In [17]:
import math

In [18]:

def gradient_descent(outputs, feature_matrix, initial_weights, step_size, tolerance):
    weights = (np.array(initial_weights))
    convergerd = False
    
    while not convergerd:
        current_error = outputs - predict(feature_matrix, weights)
        gradient_sum_square = 0;
        #update the weights of features
        for i in range(len(weights)):
            derivative = feature_derivative(feature_matrix[:, i], current_error)
            weights[i] += step_size*derivative
            
            gradient_sum_square += derivative**2
        
        gradient_magnitude = math.sqrt(gradient_sum_square)
        if(gradient_magnitude < tolerance):
            convergerd = True
    return weights
        

## Testing 

In [6]:
import graphlab

In [9]:
sales = graphlab.SFrame('kc_house_data.gl')

In [13]:
train_data, test_data = sales.random_split(0.8, seed=0)

In [14]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 # this is how you add a constant column to an SFrame
    # add the column 'constant' to the front of the features list so that we can extract it along with the others:
    features = ['constant'] + features # this is how you combine two lists
    # select the columns of data_SFrame given by the features list into the SFrame features_sframe (now including constant):
    features_sframe = data_sframe[features]

    # the following line will convert the features_SFrame into a numpy matrix:
    feature_matrix = features_sframe.to_numpy()
    # assign the column of data_sframe associated with the output to the SArray output_sarray
    output_sarray = data_sframe[output]
    # the following will convert the SArray into a numpy array by first converting it to a list
    output_array = output_sarray.to_numpy()
    return(feature_matrix, output_array)

In [15]:
# let's test out the gradient descent
simple_features = ['sqft_living']
my_output = 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [19]:
weight_test = gradient_descent(output, simple_feature_matrix, initial_weights, step_size, tolerance)

In [20]:
print weight_test

[-46999.88716555    281.91211912]


In [21]:
regression_model = graphlab.linear_regression.create(train_data, features=simple_features, target=my_output, validation_set=None)

In [22]:
coeff = regression_model['coefficients']
print coeff

+-------------+-------+----------------+---------------+
|     name    | index |     value      |     stderr    |
+-------------+-------+----------------+---------------+
| (intercept) |  None | -47114.0206702 | 4923.34437753 |
| sqft_living |  None | 281.957850166  | 2.16405465323 |
+-------------+-------+----------------+---------------+
[2 rows x 4 columns]

