## Testing ML functions 

In [4]:
# import functions and helpers
from helpers_higgs import *
from implementations import *

#import packages
import numpy as np
import matplotlib.pyplot as plt
import datetime

%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
# load data
height, weight, gender = load_old_data(sub_sample=False, add_outlier=False)
x, mean_x, std_x = standardize(height)
y, tx = build_old_model_data(x, weight)

In [6]:
# load project data
features, output, ids = load_training_data()
y = output
tx = build_model_data(features)

In [7]:
print(tx)

[[ 1.00000000e+00  3.14910656e-01  6.83319669e-02 ...  1.14381874e+00
  -2.52714288e+00  4.12510497e-01]
 [ 1.00000000e+00  7.40827026e-01  5.52504823e-01 ... -1.74353029e-17
  -1.68378328e-17 -2.73819964e-01]
 [ 1.00000000e+00 -5.38802302e-16  3.19515553e+00 ... -1.74353029e-17
  -1.68378328e-17 -2.93969845e-01]
 ...
 [ 1.00000000e+00 -3.10930673e-01  3.19316447e-01 ... -1.74353029e-17
  -1.68378328e-17 -3.17017229e-01]
 [ 1.00000000e+00 -5.10097335e-01 -8.45323970e-01 ... -1.74353029e-17
  -1.68378328e-17 -7.45439413e-01]
 [ 1.00000000e+00 -5.38802302e-16  6.65336083e-01 ... -1.74353029e-17
  -1.68378328e-17 -7.45439413e-01]]


In [8]:
print(y)

[0 1 1 ... 0 1 1]


In [10]:
# set up testing parameters
max_iters = 100
gamma = 0.05
batch_size = 1
lambda_ = 0.5
w_initial = np.array([0] * 31)

## least_squares_GD

In [11]:
start_time = datetime.datetime.now()
w, mse = mean_squared_error_gd(y, tx, w_initial, max_iters, gamma)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("GD: execution time={t:.7f} seconds".format(t=exection_time))
print("optimal weights: ", w)
print("mse: ", mse)

GD: execution time=1.1930540 seconds
optimal weights:  [ 6.53440247e-01  9.43019160e-03  1.13615714e-01  4.65945620e-02
 -2.58835058e-02 -2.01017298e-02 -3.00760927e-02  4.64024550e-03
 -8.27969205e-02  1.57691394e-02 -1.72125969e-02  4.73445197e-02
 -6.17306924e-02 -3.69468591e-02 -7.44831100e-02  6.14892584e-04
  9.64774016e-04 -5.42641163e-02  3.80266744e-04 -1.40143573e-03
 -2.04286092e-02 -8.17398890e-04  9.06117906e-03 -2.92136933e-03
  1.22469706e-02 -1.56852877e-04 -3.30944738e-04  1.93453287e-02
 -8.10680826e-04  6.93801371e-04  8.92765351e-03]
mse:  0.0864215304927601


Z## least_squares_SGD

In [20]:
start_time = datetime.datetime.now()
w, mse = mean_squared_error_sgd(y, tx, w_initial, max_iters, gamma, batch_size)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("SGD: execution time={t:.7f} seconds".format(t=exection_time))
print("optimal weights: ", w)
print("mse: ", mse)

SGD: execution time=0.0144710 seconds
optimal weights:  [ 0.68828651 -0.26052241  0.30180959 -0.10441406 -0.05549074  0.24686794
  0.27661717 -0.20581963  0.17623275 -0.06896943 -0.10003348  0.17275089
  0.13640004 -0.15780995 -0.1079856   0.26849156  0.08359543 -0.20827497
  0.04179723  0.00239657  0.00898363 -0.08395189  0.01757371  0.07901825
 -0.15685631  0.04088467 -0.09843356 -0.09800659 -0.29140715 -0.0726017
 -0.04651181]
mse:  0.5845334779905454


## least_squares_closed_form

In [23]:
start_time = datetime.datetime.now()
w, mse = least_squares(y, tx)
end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("LS: execution time={t:.7f} seconds".format(t=exection_time))
print("optimal weights: ", w)
print("mse: ", mse)

LS: execution time=0.0275140 seconds
optimal weights:  [ 6.57332000e-01 -4.81728999e-03  1.27359614e-01  1.31751484e-01
  5.50905807e-04 -1.09211915e-02 -4.50268911e-02 -2.41745297e-03
 -1.41004383e-01  1.40751290e-02  1.64629230e+02  9.40705754e-02
 -5.90325151e-02 -3.83086297e-02 -3.19811255e+01  3.89730267e-04
  4.15328401e-04 -3.15390711e+01  4.30584336e-04 -1.25895666e-03
 -5.18296558e-02 -4.66893111e-04  2.35009511e-02 -2.08787977e-02
  2.37891731e-02 -3.25363233e-04 -9.43779264e-05  1.83000923e-02
 -7.79186707e-04  8.71593305e-04 -1.39463298e+02]
mse:  0.08510236304038934


## ridge_regression

In [24]:
start_time = datetime.datetime.now()
lambdas = np.linspace(0, 0.01, 100)
wBest = []
mseBest = 10000000
for l in lambdas:
    w, mse = ridge_regression(y, tx, l)
    if mse < mseBest:
        mseBest = mse
        wBest = w
    print("mse for ", l, ": ", mse)

end_time = datetime.datetime.now()

# Print result
exection_time = (end_time - start_time).total_seconds()
print("Ridge regression: execution time={t:.7f} seconds".format(t=exection_time))
print("optimal weights: ", wBest)
print("best loss: ",mseBest)

mse for  0.0 :  0.08510236304081889
mse for  0.00010101010101010101 :  0.08515663046517952
mse for  0.00020202020202020202 :  0.08521054848401025
mse for  0.00030303030303030303 :  0.08526442055254123
mse for  0.00040404040404040404 :  0.0853182468533145
mse for  0.000505050505050505 :  0.08537202756661261
mse for  0.0006060606060606061 :  0.08542576287054306
mse for  0.0007070707070707071 :  0.08547945294111443
mse for  0.0008080808080808081 :  0.08553309795230743
mse for  0.0009090909090909091 :  0.08558669807614099
mse for  0.00101010101010101 :  0.08564025348273424
mse for  0.0011111111111111111 :  0.08569376434036459
mse for  0.0012121212121212121 :  0.0857472308155225
mse for  0.0013131313131313131 :  0.0858006530729625
mse for  0.0014141414141414141 :  0.08585403127575157
mse for  0.0015151515151515152 :  0.0859073655853149
mse for  0.0016161616161616162 :  0.08596065616147852
mse for  0.0017171717171717172 :  0.08601390316251041
mse for  0.0018181818181818182 :  0.0860671067451