In [68]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

from implementations import *
from split_data import *
import datetime

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the training data into feature matrix, class labels, and event ids:

In [69]:
from proj1_helpers import *
DATA_TRAIN_PATH = 'data/train.csv'
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

# Standardize data
tX = standardize(tX)

In [70]:
tX.shape

(250000, 30)

## Train/Test split

In [71]:
x_train, x_test, y_train, y_test = split_data(tX, y, 0.8)
best_models = {}

## 1. Linear regression using gradient descent

In [72]:
# Parameters
max_iters = [10, 100, 500, 750, 1000, 1500]
gammas = [0.01, 0.049, 0.05, 0.053, 0.07, 0.1]

best_models['least_squares_GD'] = {
    'loss_test': 1
}

for max_iter in max_iters:
    for gamma in gammas:
        # Gradient descent
        start_time = datetime.datetime.now()
        w_initial = np.array(np.zeros(30))
        w, loss_train = least_squares_GD(y_train, x_train, w_initial, max_iter, gamma)
        loss_test = compute_loss(y_test, x_test, w)
        end_time = datetime.datetime.now()
        print('least_squares_GD // Parameters: ', end = '')
        print('max_iter=' + str(max_iter) + " gamma=" + str(gamma), end = '')
        print(' // Time: ' + str((end_time - start_time).total_seconds()) + 's', end = '')
        print(" // train loss: " + str(loss_train), end = '')
        print(" // test loss: " + str(loss_test))
        if best_models['least_squares_GD']['loss_test'] > loss_test:
            best_models['least_squares_GD']['loss_test'] = loss_test
            best_models['least_squares_GD']['gamma'] = gamma
            best_models['least_squares_GD']['max_iter'] = max_iter
        

least_squares_GD // Parameters: max_iter=10 gamma=0.01 // Time: 0.107227s // train loss: 0.46525219398362894 // test loss: 0.46507146646928105
least_squares_GD // Parameters: max_iter=10 gamma=0.049 // Time: 0.095504s // train loss: 0.42870207570134317 // test loss: 0.4289706313558528
least_squares_GD // Parameters: max_iter=10 gamma=0.05 // Time: 0.076589s // train loss: 0.4282770800415848 // test loss: 0.42855451671850586
least_squares_GD // Parameters: max_iter=10 gamma=0.053 // Time: 0.090288s // train loss: 0.4270785184578131 // test loss: 0.4273812324402966
least_squares_GD // Parameters: max_iter=10 gamma=0.07 // Time: 0.080551s // train loss: 0.4219540905759663 // test loss: 0.4223677224896806
least_squares_GD // Parameters: max_iter=10 gamma=0.1 // Time: 0.073266s // train loss: 0.4167555868631728 // test loss: 0.4172784113974019
least_squares_GD // Parameters: max_iter=100 gamma=0.01 // Time: 0.841558s // train loss: 0.41721107910194555 // test loss: 0.41771055262636425
least

## 2. Linear regression using stochastic gradient descent

In [73]:
# Parameters
max_iters = [10, 100, 500, 750, 1000, 1500]
gammas = [0.01, 0.049, 0.05, 0.053, 0.07, 0.1]

best_models['least_squares_SGD'] = {
    'loss_test': 1
}

for max_iter in max_iters:
    for gamma in gammas:
        # Gradient descent
        start_time = datetime.datetime.now()
        w_initial = np.array(np.zeros(30))
        w, loss_train = least_squares_SGD(y_train, x_train, w_initial, max_iter, gamma)
        loss_test = compute_loss(y_test, x_test, w)
        end_time = datetime.datetime.now()
        print('least_squares_SGD // Parameters: ', end = '')
        print('max_iter=' + str(max_iter) + " gamma=" + str(gamma), end = '')
        print(' // Time: ' + str((end_time - start_time).total_seconds()) + 's', end = '')
        print(" // train loss: " + str(loss_train), end = '')
        print(" // test loss: " + str(loss_test))
        if best_models['least_squares_SGD']['loss_test'] > loss_test:
            best_models['least_squares_SGD']['loss_test'] = loss_test
            best_models['least_squares_SGD']['gamma'] = gamma
            best_models['least_squares_SGD']['max_iter'] = max_iter

least_squares_SGD // Parameters: max_iter=10 gamma=0.01 // Time: 0.309497s // train loss: 0.5379058738737937 // test loss: 0.5381510089686146
least_squares_SGD // Parameters: max_iter=10 gamma=0.049 // Time: 0.248851s // train loss: 0.5545475172340977 // test loss: 0.553866630107197
least_squares_SGD // Parameters: max_iter=10 gamma=0.05 // Time: 0.253265s // train loss: 0.5778756220924579 // test loss: 0.5776464081315242
least_squares_SGD // Parameters: max_iter=10 gamma=0.053 // Time: 0.259537s // train loss: 0.6605660692294563 // test loss: 0.6561251330424567
least_squares_SGD // Parameters: max_iter=10 gamma=0.07 // Time: 0.242904s // train loss: 0.5992502810492313 // test loss: 0.6019676591871684
least_squares_SGD // Parameters: max_iter=10 gamma=0.1 // Time: 0.241919s // train loss: 0.8964452744868399 // test loss: 0.894060292433458
least_squares_SGD // Parameters: max_iter=100 gamma=0.01 // Time: 2.488692s // train loss: 0.45026175730933005 // test loss: 0.4507196317336612
least

## Final overview

In [74]:
for model in best_models:
    print(model)
    print(best_models[model])
    print()

least_squares_GD
{'loss_test': 0.3900498812031061, 'gamma': 0.1, 'max_iter': 1500}

least_squares_SGD
{'loss_test': 0.4101885724806808, 'gamma': 0.01, 'max_iter': 1500}

