In [None]:
import numpy as np
import pandas as pd
from google.colab import drive

In [None]:
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
test = pd.read_csv('gdrive/My Drive/uwml/kc_house_test_data.csv')
train = pd.read_csv('gdrive/My Drive/uwml/kc_house_train_data.csv')

In [None]:
def get_numpy_data(df, features, output):
  df['constant'] = 1
  x_list = ['constant'] + features
  x = df[x_list]
  x = x.to_numpy()
  y = df[output]
  y = y.to_numpy()
  return(x, y)

x_test, y_test = get_numpy_data(test, ['sqft_living'], 'price')

x_test

array([[   1, 1430],
       [   1, 2950],
       [   1, 1710],
       ...,
       [   1, 2520],
       [   1, 2310],
       [   1, 1020]])

In [None]:
def predict_outcome(feature_matrix, weights):
  predictions = np.dot(feature_matrix, weights)
  return(predictions)

dummy_weights = np.array([-47000., 1.])  #np.random.rand(2)

yhat = predict_outcome(x_test, dummy_weights)
yhat

array([-45570., -44050., -45290., ..., -44480., -44690., -45980.])

In [None]:
def errors(yhat, y):
  e = yhat - y
  return(e)

error = errors(yhat, y_test)
error

array([-355570., -694050., -278290., ..., -655165., -444690., -448081.])

In [None]:
print(len(y_test))
print(len(error))

4229
4229


In [None]:
def feature_derivative(errors, feature):
    derivative = np.dot(errors, feature)*2
    return(derivative)

In [None]:
feature_derivative(error, x_test[:, 1])

-12239402615640.0

In [None]:
def regression_gradient_descent(feature_matrix, output, initial_weights, step_size, tolerance):
  converged = False
  weights = np.array(initial_weights)
  while not converged:
    # compute the predictions based on feature_matrix and weights:
    yhat = predict_outcome(feature_matrix, weights)
    # compute the errors as predictions - output:
    e = errors(yhat, output)
    
    gradient_sum_squares = 0 # initialize the gradient
    
    # while not converged, update each weight individually:
    for i in range(len(weights)):
      # Recall that feature_matrix[:, i] is the feature column associated with weights[i]
      feature = feature_matrix[:, i]
      # compute the derivative for weight[i]:
      deriv = feature_derivative(e, feature)
      
      # add the squared derivative to the gradient magnitude
      gradient_sum_squares = gradient_sum_squares+deriv**2

      # update the weight based on step size and derivative:
      weights[i] = weights[i]-(step_size*deriv)
        
    gradient_magnitude = np.sqrt(gradient_sum_squares)
    if gradient_magnitude < tolerance:
      converged = True
  return(weights)

In [None]:
simple_features = ['sqft_living']
my_output= 'price'
simple_feature_matrix_train, output_train = get_numpy_data(train, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

In [None]:
simple_weights = regression_gradient_descent(simple_feature_matrix_train, output_train, initial_weights, step_size, tolerance)

In [None]:
print(simple_weights[0])
print(simple_weights[1])

-46999.88716554671
281.91211917520917


In [None]:
simple_feature_matrix_test, output_test = get_numpy_data(test, simple_features, my_output)

In [None]:
simple_preds = predict_outcome(simple_feature_matrix_test, simple_weights)

In [None]:
print(simple_preds[0])
print(output_test[0])

356134.4432550024
310000.0


In [None]:
simple_error = errors(yhat=simple_preds, y=output_test)
rss = sum(simple_error**2)/1e12
rss

275.4000449021288

In [None]:
model_features = ['sqft_living', 'sqft_living15']
my_output = 'price'
feature_matrix_train, output_train = get_numpy_data(train, model_features, my_output)
initial_weights = np.array([-100000., 1., 1.])
step_size = 4e-12
tolerance = 1e9

In [None]:
weights = regression_gradient_descent(feature_matrix_train, output_train, initial_weights, step_size, tolerance)

In [None]:
print(weights[0])
print(weights[1])

-99999.96884887619
245.07260346458017


In [None]:
feature_matrix_test, output_test = get_numpy_data(test, model_features, my_output)
preds = predict_outcome(feature_matrix_test, weights)

In [None]:
print(preds[0])
print(output_test[0])

366651.4116294939
310000.0


In [None]:
error = errors(yhat=preds, y=output_test)
rss = sum(error**2)/1e12
rss

270.2634436298033