In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from implementations import *
from evaluation import *
import datetime
from collections import Counter
from proj1_helpers import *
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [2]:
DATA_TRAIN_PATH = 'data/train.csv'

# Set sub_sample at False to test with all the data
sub_sample=False
y, tX, ids = load_csv_data(DATA_TRAIN_PATH, sub_sample=sub_sample)

## Set parameters

In [3]:
evaluation_data = []
split_size = 10
equalize = False #equalize true and false in data
num_param = tX.shape[1]

# Set the hyperparameters you want to test
max_iters = [100, 300, 1000]
gammas = [0.01, 0.05, 0.1, 0.5]
lambdas = [0.01, 0.05, 0.1, 0.5]
poly_degrees = [1, 3, 5, 7, 10, 12]

# Hyperparameters for a fast execution
#max_iters = [2]
#gammas = [0.01]
#lambdas = [0.01]
#poly_degrees = [2]

## 1. Linear regression using gradient descent

In [4]:
for poly_degree in poly_degrees:
    for max_iter in max_iters:
        for gamma in gammas:
            initial_w = np.array(np.zeros(num_param*poly_degree+1))
            evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, least_squares_GD, initial_w=initial_w, max_iters=max_iter, gamma=gamma)

            evaluation_data.append(evaluation_result)
            print(evaluation_result)

{'method': 'least_squares_GD', 'poly_degree': 2, 'parameters': {'initial_w': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'max_iters': 2, 'gamma': 0.01}, 'train_losses': [0.48806610529693156, 0.48632395960871994, 0.48660464786745633, 0.4865508933297451, 0.4868481994978501, 0.48676610654876434, 0.4868477294984041, 0.48622701087995845, 0.48628633762629786, 0.48716253876790205], 'test_losses': [0.48806610529693184, 0.48632395960872005, 0.48660464786745633, 0.4865508933297452, 0.4868481994978502, 0.48676610654876423, 0.4868477294984041, 0.48622701087995857, 0.48628633762629786, 0.48716253876790205], 'accuracies': [0.70048, 0.7076, 0.7076, 0.70536, 0.70592, 0.70488, 0.70648, 0.70448, 0.7048, 0.70112], 'f1_scores': [0.5665663347997222, 0.5744556991500757, 0.567404426

## 2. Linear regression using stochastic gradient descent

In [5]:
for poly_degree in poly_degrees:
    for max_iter in max_iters:
        for gamma in gammas:
            initial_w = np.array(np.zeros(num_param*poly_degree+1))
            evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, least_squares_SGD, initial_w=initial_w, max_iters=max_iter, gamma=gamma)

            evaluation_data.append(evaluation_result)
            print(evaluation_result)

{'method': 'least_squares_SGD', 'poly_degree': 2, 'parameters': {'initial_w': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'max_iters': 2, 'gamma': 0.01}, 'train_losses': [0.49201943917369984, 0.49995754391759784, 0.5178967938849826, 0.48277634004006525, 0.48472851271305534, 0.48110092365007484, 0.4982124849107408, 0.49433024961467714, 0.5009191310137673, 0.4821457239355709], 'test_losses': [0.4920194391737001, 0.499957543917598, 0.5178967938849826, 0.4827763400400653, 0.48472851271305556, 0.4811009236500749, 0.4982124849107409, 0.49433024961467725, 0.5009191310137671, 0.4821457239355709], 'accuracies': [0.5216, 0.49792, 0.4248, 0.63408, 0.62784, 0.58696, 0.51112, 0.52728, 0.53696, 0.61064], 'f1_scores': [0.39116269598859704, 0.401030731055545, 0.39589984876491

## 3. Least squares regression using normal equations

In [6]:
for poly_degree in poly_degrees:
    evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, least_squares)
        
    evaluation_data.append(evaluation_result)
    print(evaluation_result)

{'method': 'least_squares', 'poly_degree': 2, 'parameters': {}, 'train_losses': [0.3163447965718478, 0.30851497895145213, 0.3124366299449528, 0.3112927730271161, 0.31064388684081906, 0.31393112351999475, 0.3067479763335264, 0.31224670282232964, 0.31283815491752026, 0.31784225882954886], 'test_losses': [0.3163447965718478, 0.3085149789514522, 0.31243662994495286, 0.3112927730271161, 0.3106438868408191, 0.3139311235199947, 0.3067479763335264, 0.3122467028223297, 0.31283815491752043, 0.317842258829549], 'accuracies': [0.77344, 0.7784, 0.77848, 0.77944, 0.7788, 0.77592, 0.7816, 0.77888, 0.77536, 0.7744], 'f1_scores': [0.6336351875808538, 0.6435006435006435, 0.6348410919161281, 0.643199171735473, 0.6363277653557806, 0.6426840158183443, 0.6433237522863862, 0.6518891687657431, 0.6319790301441678, 0.6378114564603133], 'precisions': [0.7127473806752037, 0.7124536905101169, 0.7161559059803629, 0.7225937772608316, 0.7158922758212489, 0.7162354279215241, 0.7213595077644301, 0.7153123272526257, 0.7

## 4. Ridge regression using normal equations

In [7]:
for poly_degree in poly_degrees:
    for lambda_ in lambdas:
        evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, ridge_regression, lambda_=lambda_)

        evaluation_data.append(evaluation_result)
        print(evaluation_result)

{'method': 'ridge_regression', 'poly_degree': 2, 'parameters': {'lambda_': 0.01}, 'train_losses': [0.32082007044701605, 0.31221963026358507, 0.31597050412439687, 0.31525283392616016, 0.3143975043445532, 0.31803767894122986, 0.31069436787076965, 0.3157742901393175, 0.31715607637871185, 0.321547953775386], 'test_losses': [0.3208200704470162, 0.3122196302635852, 0.315970504124397, 0.3152528339261602, 0.3143975043445532, 0.3180376789412301, 0.3106943678707697, 0.3157742901393176, 0.31715607637871185, 0.32154795377538603], 'accuracies': [0.76384, 0.77016, 0.76976, 0.77336, 0.77248, 0.768, 0.77696, 0.77072, 0.77072, 0.76752], 'f1_scores': [0.6135113904163394, 0.6263493302119912, 0.6162666666666667, 0.6282640073481172, 0.6227115945874238, 0.6266735324407827, 0.6324808858423411, 0.6358322744599746, 0.6197930485539931, 0.6230869001297017], 'precisions': [0.7006578947368421, 0.7024504084014003, 0.7050030506406345, 0.7182718271827183, 0.708207604103802, 0.706326175275682, 0.7171898355754858, 0.70

## 5. Logistic regression using gradient descent or SGD

In [8]:
for poly_degree in poly_degrees:
    for max_iter in max_iters:
        for gamma in gammas:
            initial_w = np.array(np.zeros(num_param*poly_degree+1))
            evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, logistic_regression, max_iters=max_iter, gamma=gamma, initial_w=initial_w)

            evaluation_data.append(evaluation_result)
            print(evaluation_result)

Iteration =  0 	loss =  77979.05781299151
Iteration =  0 	loss =  1319250.005186148
Iteration =  0 	loss =  583096.206015225
Iteration =  0 	loss =  1044549.5500241427
Iteration =  0 	loss =  633343.8654422795
Iteration =  0 	loss =  1004065.1945785799
Iteration =  0 	loss =  635214.7904315888
Iteration =  0 	loss =  912935.2553831073
Iteration =  0 	loss =  667645.9313022145
Iteration =  0 	loss =  864117.5098568926
{'method': 'logistic_regression', 'poly_degree': 2, 'parameters': {'max_iters': 2, 'gamma': 0.01, 'initial_w': array([ 1.05623800e+02, -3.51552861e+02, -6.48747731e+01,  9.24714087e+01,
        3.83849080e+01,  1.14925719e+02, -2.44261130e+01,  3.02567553e+02,
       -1.23621030e+02, -2.86869181e+01, -2.19242789e+02,  1.38759004e+02,
        1.11799200e+02,  3.29280268e+02, -2.96516733e+00,  1.69875241e+01,
        1.43763774e+02,  6.43832792e-01, -2.91309201e-01, -2.10184436e+01,
        2.20317522e+01, -3.76227169e+01, -2.30677072e+01, -1.16205492e+02,
        1.11521028

## 6. Regularized logistic regression using gradient descent or SGD

In [9]:
for max_iter in max_iters:
    for gamma in gammas:
        for lambda_ in lambdas:
            initial_w = np.array(np.zeros(num_param*poly_degree+1))
            evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, reg_logistic_regression, max_iters=max_iter, gamma=gamma, initial_w=initial_w, lambda_=lambda_)

            evaluation_data.append(evaluation_result)
            print(evaluation_result)

Iteration =  0 	loss =  77979.05781299151
Iteration =  0 	loss =  1324107.174017514
Iteration =  0 	loss =  590842.7436325358
Iteration =  0 	loss =  1052256.3264459998
Iteration =  0 	loss =  642427.5689068267
Iteration =  0 	loss =  1014489.141098264
Iteration =  0 	loss =  647275.3577172145
Iteration =  0 	loss =  925939.3801562154
Iteration =  0 	loss =  680198.4733660555
Iteration =  0 	loss =  877222.578808354
{'method': 'reg_logistic_regression', 'poly_degree': 2, 'parameters': {'max_iters': 2, 'gamma': 0.01, 'initial_w': array([ 1.05413052e+02, -3.51676913e+02, -6.48487854e+01,  9.24246405e+01,
        3.85129017e+01,  1.14836235e+02, -2.44777232e+01,  3.01837105e+02,
       -1.23455075e+02, -2.88447085e+01, -2.19342212e+02,  1.38774894e+02,
        1.11712071e+02,  3.28845852e+02, -3.00279800e+00,  1.71267092e+01,
        1.43259062e+02,  6.42297878e-01, -3.94763299e-01, -2.13361752e+01,
        2.21795599e+01, -3.76523914e+01, -2.31351839e+01, -1.16251307e+02,
        1.11377

## Final overview

In [10]:
for el in evaluation_data:
    print(el)

{'method': 'least_squares_GD', 'poly_degree': 2, 'parameters': {'initial_w': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'max_iters': 2, 'gamma': 0.01}, 'train_losses': [0.48806610529693156, 0.48632395960871994, 0.48660464786745633, 0.4865508933297451, 0.4868481994978501, 0.48676610654876434, 0.4868477294984041, 0.48622701087995845, 0.48628633762629786, 0.48716253876790205], 'test_losses': [0.48806610529693184, 0.48632395960872005, 0.48660464786745633, 0.4865508933297452, 0.4868481994978502, 0.48676610654876423, 0.4868477294984041, 0.48622701087995857, 0.48628633762629786, 0.48716253876790205], 'accuracies': [0.70048, 0.7076, 0.7076, 0.70536, 0.70592, 0.70488, 0.70648, 0.70448, 0.7048, 0.70112], 'f1_scores': [0.5665663347997222, 0.5744556991500757, 0.567404426