## Testing ML functions 

In [19]:
# import functions and helpers
from helpers import *
from implementations import *

#import packages
import numpy as np
import matplotlib.pyplot as plt
import datetime

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
# load data
height, weight, gender = load_old_data(sub_sample=False, add_outlier=False)
x, mean_x, std_x = standardize(height)
y, tx = build_old_model_data(x, weight)

In [61]:
# load project data
features, output, ids = load_training_data()
y = output
tx = build_model_data(features)

(250000, 30)


In [62]:
print(tx)

[[ 1.00000000e+00  3.14910656e-01  6.83319669e-02 ...  1.14381874e+00
  -2.52714288e+00  4.12510497e-01]
 [ 1.00000000e+00  7.40827026e-01  5.52504823e-01 ... -1.74353029e-17
  -1.68378328e-17 -2.73819964e-01]
 [ 1.00000000e+00 -5.38802302e-16  3.19515553e+00 ... -1.74353029e-17
  -1.68378328e-17 -2.93969845e-01]
 ...
 [ 1.00000000e+00 -3.10930673e-01  3.19316447e-01 ... -1.74353029e-17
  -1.68378328e-17 -3.17017229e-01]
 [ 1.00000000e+00 -5.10097335e-01 -8.45323970e-01 ... -1.74353029e-17
  -1.68378328e-17 -7.45439413e-01]
 [ 1.00000000e+00 -5.38802302e-16  6.65336083e-01 ... -1.74353029e-17
  -1.68378328e-17 -7.45439413e-01]]


In [72]:
# set up testing parameters
max_iters = 100
gamma = 0.05
batch_size = 1
lambda_ = 0.5
w_initial = np.array([0] * 31)

## least_squares_GD

In [75]:
start_time = datetime.datetime.now()
w, mse = least_squares_GD(y, tx, w_initial, max_iters, gamma)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("GD: execution time={t:.7f} seconds".format(t=exection_time))
print("optimal weights: ", w)
print("mse: ", mse)

[1. 1. 1. ... 0. 0. 1.]
[ 6.57332000e-01 -4.81743327e-03  1.27359910e-01  1.31751455e-01
  5.51227442e-04 -1.09210580e-02 -4.50270170e-02 -2.41745287e-03
 -1.41004258e-01  1.40750654e-02  1.64656896e+02  9.40706085e-02
 -5.90325087e-02 -3.83086313e-02 -3.19864841e+01  3.89729485e-04
  4.15328366e-04 -3.15443468e+01  4.30584500e-04 -1.25895687e-03
 -5.18296552e-02 -4.66892903e-04  2.35009519e-02 -2.08787977e-02
  2.37891726e-02 -3.25363119e-04 -9.43779134e-05  1.83000920e-02
 -7.79186730e-04  8.71593634e-04 -1.39486733e+02]
0.046031568506700905
GD: execution time=0.9825630 seconds
optimal weights:  [-1.62170796e+43  5.91313363e+57 -2.44751567e+58 -4.15385265e+57
  1.18289937e+59 -6.32427273e+57  2.04892722e+58 -8.45879916e+56
 -6.92300441e+58  5.78470433e+58  1.25716360e+59  9.93340444e+57
  6.57500506e+58 -1.68803705e+57  5.76854808e+58  4.63892808e+56
  8.39337646e+56  4.98679852e+58  1.39197697e+57 -4.82116758e+56
  8.20468735e+58  1.13033906e+57  1.19468245e+59  9.52284340e+58
  9.5

Z## least_squares_SGD

In [74]:
start_time = datetime.datetime.now()
w, mse = least_squares_SGD(y, tx, w_initial, max_iters, gamma, batch_size)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("SGD: execution time={t:.7f} seconds".format(t=exection_time))
print("optimal weights: ", w)
print("mse: ", mse)

SGD: execution time=20.1630010 seconds
optimal weights:  [-2.07081531e+49 -2.47551068e+47 -1.74067370e+49  1.26013347e+49
  5.57572942e+48  5.67443399e+48  4.16322167e+48 -2.98227397e+48
  1.58063684e+49  2.97723465e+48  6.18618550e+48 -5.02554785e+47
  1.47963557e+49  7.03017629e+48 -2.62016428e+47 -3.39847923e+48
 -7.85775014e+48  7.56398832e+48 -1.83621688e+48  1.34343378e+49
 -8.46775144e+48 -6.22451883e+48  7.43797804e+48  8.45427752e+48
 -1.09648127e+48 -1.58639572e+49 -3.61271951e+47  1.36081736e+48
 -1.95916598e+48  4.29318512e+48  5.65984027e+48]
mse:  1.9232525171590263e+99


## least_squares_closed_form

In [70]:
start_time = datetime.datetime.now()
w, mse = least_squares(y, tx)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("LS: execution time={t:.7f} seconds".format(t=exection_time))
print("optimal weights: ", w)
print("mse: ", mse)

LS: execution time=0.1473590 seconds
optimal weights:  [ 6.57332000e-01 -4.81743327e-03  1.27359910e-01  1.31751455e-01
  5.51227442e-04 -1.09210580e-02 -4.50270170e-02 -2.41745287e-03
 -1.41004258e-01  1.40750654e-02  1.64656896e+02  9.40706085e-02
 -5.90325087e-02 -3.83086313e-02 -3.19864841e+01  3.89729485e-04
  4.15328366e-04 -3.15443468e+01  4.30584500e-04 -1.25895687e-03
 -5.18296552e-02 -4.66892903e-04  2.35009519e-02 -2.08787977e-02
  2.37891726e-02 -3.25363119e-04 -9.43779134e-05  1.83000920e-02
 -7.79186730e-04  8.71593634e-04 -1.39486733e+02]
mse:  0.08510236304075187


## ridge_regression

In [71]:
start_time = datetime.datetime.now()
lambdas = np.linspace(0, 0.01, 100)
wBest = []
mseBest = 10000000
for l in lambdas:
    w, mse = ridge_regression(y, tx, l)
    if mse < mseBest:
        mseBest = mse
        wBest = w
    print("mse for ", l, ": ", mse)

end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("Ridge regression: execution time={t:.7f} seconds".format(t=exection_time))
print("optimal weights: ", wBest)

mse for  0.0 :  0.08510236304075187
mse for  0.00010101010101010101 :  0.08510268940976572
mse for  0.00020202020202020202 :  0.08510275845788193
mse for  0.00030303030303030303 :  0.08510287308928195
mse for  0.00040404040404040404 :  0.08510303294462443
mse for  0.000505050505050505 :  0.08510323767107736
mse for  0.0006060606060606061 :  0.0851034869220186
mse for  0.0007070707070707071 :  0.08510378035675728
mse for  0.0008080808080808081 :  0.08510411764027734
mse for  0.0009090909090909091 :  0.08510449844300108
mse for  0.00101010101010101 :  0.08510492244057165
mse for  0.0011111111111111111 :  0.08510538931365186
mse for  0.0012121212121212121 :  0.08510589874773807
mse for  0.0013131313131313131 :  0.08510645043298766
mse for  0.0014141414141414141 :  0.08510704406405863
mse for  0.0015151515151515152 :  0.08510767933996057
mse for  0.0016161616161616162 :  0.08510835596391536
mse for  0.0017171717171717172 :  0.08510907364322741
mse for  0.0018181818181818182 :  0.0851098320