In [1]:
import numpy as np
import math
import pandas as pd
import turicreate as tc
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn import linear_model


In [None]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 # add a constant column to an SFrame
    # prepend variable 'constant' to the features list
    features = ['constant'] + features
    # select the columns of data_SFrame given by the ‘features’ list into the SFrame ‘features_sframe’
    features_sframe = data_sframe[features]
    # this will convert the features_sframe into a numpy matrix:
    features_matrix = features_sframe.to_numpy()
    # assign the column of data_sframe associated with the target to the variable ‘output_sarray’
    output_sarray = data_sframe[output]
    # this will convert the SArray into a numpy array:
    output_array = output_sarray.to_numpy()
    return(features_matrix, output_array)

def predict_output(feature_matrix, weights):
    predictions = np.matmul(feature_matrix, weights)
    return(predictions)

def feature_derivative_ridge(errors, feature, weight, l2_penalty, feature_is_constant):
    derivative = 0
    if feature_is_constant == False:
        derivative = (2. * np.dot(feature, errors)) + 2. * l2_penalty * weight
    else:
        derivative = 2. * np.dot(feature, errors)
    return derivative

def feature_derivative(errors, feature):
    derivative = -2 * np.dot(feature, errors)
    return(derivative)

def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
    converged = False
    weights = np.array(initial_weights)
    while not converged:
        # compute the predictions based on feature_matrix and weights:
        predictions = predict_outcome(feature_matrix, weights)
        # compute the errors as predictions - output:
        errors = np.subtract(predictions, output)
        
        gradient_sum_squares = 0 # initialize the gradient
        # while not converged, update each weight individually:
        for i in range(len(weights)):
            # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
            # compute the derivative for weight[i]:
            derivative = feature_derivative(errors, feature_matrix[:, i])
            # add the squared derivative to the gradient magnitude
            gradient_sum_squares += np.square(derivative)
            # update the weight based on step size and derivative:
            
            weights[i] = weights[i] - (step_size * derivative)
        gradient_magnitude = np.sqrt(gradient_sum_squares)
        
        if gradient_magnitude < tolerance:
            converged = True
    return(weights)

def ridge_regression_gradient_descent(feature_matrix, output, initial_weights, step_size, l2_penalty, max_iterations=100):
    weights = np.array(initial_weights) # make sure it's a numpy array
        #while not reached maximum number of iterations:
    num_iter = 0
    
    while num_iter < max_iterations:
        # compute the predictions using your predict_output() function
        predictions = predict_output(feature_matrix, weights)
        # compute the errors as predictions - output
        errors = np.subtract(predictions, output)
        for i in range(len(weights)): # loop over each weight
            # Recall that feature_matrix[:,i] is the feature column associated with weights[i]
            # compute the derivative for weight[i].
            #(Remember: when i=0, you are computing the derivative of the constant!)
            derivative = 0
            if i == 0:
                derivative = feature_derivative_ridge(errors, feature_matrix[:, i], weights[i], l2_penalty, True)
            else:
                derivative = feature_derivative_ridge(errors, feature_matrix[:, i], weights[i], l2_penalty, False)
            # subtract the step size times the derivative from the current weight
            
            weights[i] = weights[i] - step_size * derivative
        num_iter += 1
    return weights

def normalize_features(features):
    norms = np.linalg.norm(features, axis=0)
    normalized_features = features/norms
    return (normalized_features, norms)

def lasso_coordinate_descent_step(i, feature_matrix, output, weights, l1_penalty):
    # compute prediction
    prediction = predict_output(feature_matrix, weights)
    # compute ro[i] = SUM[ [feature_i]*(output - prediction + weight[i]*[feature_i]) ]
    ro_i = (feature_matrix[:,i] * (output - prediction + (weights[i] * feature_matrix[:,i]))).sum()
    
    if i == 0: # intercept -- do not regularize
        new_weight_i = ro_i
    elif ro_i < -l1_penalty/2.:
        new_weight_i = ro_i + l1_penalty/2.
    elif ro_i > l1_penalty/2.:
        new_weight_i = ro_i - l1_penalty/2.
    else:
        new_weight_i = 0.
    
    return new_weight_i

def lasso_cyclical_coordinate_descent(feature_matrix, output, initial_weights, l1_penalty, tolerance):
    weights = np.array(initial_weights)
    convergence_test = False
    delta = np.array(initial_weights) * 0.
    print(feature_matrix, output, weights, l1_penalty)
    while not convergence_test:
        for j in range(len(weights)):
            old_weight = weights[j]
            weights[j] = lasso_coordinate_descent_step(j, feature_matrix, output, weights, l1_penalty)
            delta[j] = np.abs(weights[j] - old_weight)
        
        max_change = np.amax(delta)
#         print '  ** max change: ' + str(max_change)
#         print '--------------------------------------------------'
        if max_change < tolerance:
            convergence_test = True
    return weights

In [4]:
print (lasso_coordinate_descent_step(1, np.array([[3./math.sqrt(13),1./math.sqrt(10)],
                   [2./math.sqrt(13),3./math.sqrt(10)]]), np.array([1., 1.]), np.array([1., 4.]), 0.1))
a = np.array([1,2,3,4,5])
print(len(a[a>2]))

0.4255588466910251
3


In [3]:
dtype_dict = {'bathrooms':float, 'waterfront':int, 'sqft_above':int, 'sqft_living15':float, 'grade':int, 'yr_renovated':int, 'price':float, 'bedrooms':float, 'zipcode':str, 'long':float, 'sqft_lot15':float, 'sqft_living':float, 'floors':str, 'condition':int, 'lat':float, 'date':str, 'sqft_basement':int, 'yr_built':int, 'id':str, 'sqft_lot':int, 'view':int}

sales = tc.SFrame('m_1ce96d9d245ca490.frame_idx')
sales_data_pd = pd.read_csv('kc_house_data.csv', dtype = dtype_dict)
sales_test_pd = pd.read_csv('kc_house_test_data.csv', dtype = dtype_dict)
sales_train_pd = pd.read_csv('kc_house_train_data.csv', dtype = dtype_dict)


In [5]:

simple_features = ['sqft_living', 'bathrooms']
output_feature = 'price'

simple_feature_matrix, simple_output = get_numpy_data(sales, simple_features, output_feature)

In [6]:
simple_rows, simple_cols = simple_feature_matrix.shape


In [11]:
initial_weights = np.zeros(simple_cols)
L1_penalty = 1e7
Tolerance = 1.0

In [8]:
normalized_simple_feature_matrix, norm_s_f_matrix = normalize_features(simple_feature_matrix)
normalized_simple_output, norm_s_output = normalize_features(simple_output)

In [9]:
features, norms = normalize_features(np.array([[3.,6.,9.],[4.,8.,12.]]))
print (norms)

[ 5. 10. 15.]


In [None]:
simple_output

In [None]:
weights = lasso_cyclical_coordinate_descent(simple_feature_matrix, simple_output, initial_weights, L1_penalty, Tolerance)

In [2]:
def get_numpy_data(data_sframe, features, output):
    data_sframe['constant'] = 1 # add a constant column to an SFrame
    # prepend variable 'constant' to the features list
    features = ['constant'] + features
    # select the columns of data_SFrame given by the ‘features’ list into the SFrame ‘features_sframe’
    features_sframe = data_sframe[features]
    # this will convert the features_sframe into a numpy matrix:
    features_matrix = features_sframe.to_numpy()
    # assign the column of data_sframe associated with the target to the variable ‘output_sarray’
    output_sarray = data_sframe[output]
    # this will convert the SArray into a numpy array:
    output_array = output_sarray.to_numpy()
    return(features_matrix, output_array)

def predict_output(feature_matrix, weights):
    predictions = np.matmul(feature_matrix, weights)
    return(predictions)

def normalize_features(features):
    norms = np.linalg.norm(features, axis=0)
    normalized_features = features/norms
    return (normalized_features, norms)


def lasso_coordinate_descent_step(i, feature_matrix, output, weights, l1_penalty):
    # compute prediction
    prediction = predict_output(feature_matrix, weights)
    # compute ro[i] = SUM[ [feature_i]*(output - prediction + weight[i]*[feature_i]) ]
    ro_i = (feature_matrix[:,i] * (output - prediction + (weights[i] * feature_matrix[:,i]))).sum()

    if i == 0: # intercept -- do not regularize
        new_weight_i = ro_i 
    elif ro_i < -l1_penalty/2.:
        new_weight_i = (ro_i + l1_penalty/2.)
    elif ro_i > l1_penalty/2.:
        new_weight_i = (ro_i - l1_penalty/2.)
    else:
        new_weight_i = 0.
    
    return new_weight_i

def lasso_cyclical_coordinate_descent(feature_matrix, output, initial_weights, l1_penalty, tolerance):
    D = feature_matrix.shape[1]
    weights = np.array(initial_weights)
    change = np.array(initial_weights) * 0.0
    converged = False

    while not converged:

    # Evaluate over all features
        for idx in range(D):
#             print 'Feature: ' + str(idx)
            # new weight for feature
            new_weight = lasso_coordinate_descent_step(idx, feature_matrix,
                                                       output, weights,
                                                       l1_penalty)
            # compute change in weight for feature
            change[idx] = np.abs(new_weight - weights[idx])
#             print '  -> old weight: ' + str(weights[idx]) + ', new weight: ' + str(new_weight)
#             print '  -> abs change (new - old): ' + str(change[idx])
#             print '  >> old weights: ', weights

            # assign new weight
            weights[idx] = new_weight
#             print '  >> new weights: ', weights
        # maximum change in weight, after all changes have been computed
        max_change = max(change)
#         print '  ** max change: ' + str(max_change)
#         print '--------------------------------------------------'
        if max_change < tolerance:
            converged = True
    return weights