In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

from implementations import *
from evaluation import *
import datetime

## Load the training data into feature matrix, class labels, and event ids:

In [2]:
from proj1_helpers import *
DATA_TRAIN_PATH = 'data/train.csv'
y, tX, ids = load_csv_data(DATA_TRAIN_PATH, sub_sample=False)

## Properties of dataset

In [3]:
from collections import Counter
Counter(y)

Counter({1.0: 42748, -1.0: 82252})

## Collect data

In [4]:
evaluation_data = []

## 1. Linear regression using gradient descent

In [5]:
split_size = 10
equalize = False #equalize true and false in data
num_param = tX.shape[1]

max_iters = [100, 300, 1000]
gammas = [0.01, 0.05, 0.1, 0.5]
lambdas = [0.01, 0.05, 0.1, 0.5]
poly_degrees = [1, 3, 5, 7, 10, 12]

In [7]:
for poly_degree in poly_degrees:
    for max_iter in max_iters:
        for gamma in gammas:
            initial_w = np.array(np.zeros(num_param*poly_degree+1))
            evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, least_squares_GD, initial_w=initial_w, max_iters=max_iter, gamma=gamma)

            evaluation_data.append(evaluation_result)
            print(evaluation_result)

{'method': 'least_squares_GD', 'poly_degree': 2, 'parameters': {'initial_w': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'max_iters': 10, 'gamma': 0.001}, 'train_losses': [0.49390621833261733, 0.4929860681621087, 0.4931382898371554, 0.4931076496929922, 0.49326816169689564, 0.49321434364129807, 0.4932786988849711, 0.4929262909989992, 0.4929618966354508, 0.4934290349405035], 'test_losses': [0.49390621833261733, 0.4929860681621086, 0.49313828983715535, 0.49310764969299237, 0.49326816169689586, 0.49321434364129824, 0.49327869888497106, 0.4929262909989992, 0.49296189663545087, 0.4934290349405035], 'accuracies': [0.70016, 0.70736, 0.70744, 0.70504, 0.70552, 0.70464, 0.70576, 0.70408, 0.70456, 0.70064], 'f1_scores': [0.5661032646445936, 0.5739576054041463, 0.56716771

## 2. Linear regression using stochastic gradient descent

In [8]:
for poly_degree in poly_degrees:
    for max_iter in max_iters:
        for gamma in gammas:
            initial_w = np.array(np.zeros(num_param*poly_degree+1))
            evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, least_squares_SGD, initial_w=initial_w, max_iters=max_iter, gamma=gamma)

            evaluation_data.append(evaluation_result)
            print(evaluation_result)

{'method': 'least_squares_SGD', 'poly_degree': 2, 'parameters': {'initial_w': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'max_iters': 10, 'gamma': 0.001}, 'train_losses': [0.4937193964069987, 0.49635403265692757, 0.48778269852382694, 0.49163535760551585, 0.49326513195553534, 0.49160640084950485, 0.4929259316469312, 0.4925600681437829, 0.4907229019173871, 0.4916633619620205], 'test_losses': [0.4937193964069989, 0.4963540326569275, 0.4877826985238269, 0.4916353576055157, 0.4932651319555355, 0.49160640084950447, 0.49292593164693144, 0.4925600681437829, 0.4907229019173868, 0.49166336196202076], 'accuracies': [0.64536, 0.61136, 0.67816, 0.67016, 0.67712, 0.69168, 0.66504, 0.6328, 0.67216, 0.6456], 'f1_scores': [0.45426566539455865, 0.37429160226687275, 0.554139421

## 3. Least squares regression using normal equations

In [9]:
for poly_degree in poly_degrees:
    evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, least_squares)
        
    evaluation_data.append(evaluation_result)
    print(evaluation_result)

{'method': 'least_squares', 'poly_degree': 2, 'parameters': {}, 'train_losses': [0.3163447965718478, 0.30851497895145213, 0.3124366299449528, 0.3112927730271161, 0.31064388684081906, 0.31393112351999475, 0.3067479763335264, 0.31224670282232964, 0.31283815491752026, 0.31784225882954886], 'test_losses': [0.3163447965718478, 0.3085149789514522, 0.31243662994495286, 0.3112927730271161, 0.3106438868408191, 0.3139311235199947, 0.3067479763335264, 0.3122467028223297, 0.31283815491752043, 0.317842258829549], 'accuracies': [0.77344, 0.7784, 0.77848, 0.77944, 0.7788, 0.77592, 0.7816, 0.77888, 0.77536, 0.7744], 'f1_scores': [0.6336351875808538, 0.6435006435006435, 0.6348410919161281, 0.643199171735473, 0.6363277653557806, 0.6426840158183443, 0.6433237522863862, 0.6518891687657431, 0.6319790301441678, 0.6378114564603133], 'precisions': [0.7127473806752037, 0.7124536905101169, 0.7161559059803629, 0.7225937772608316, 0.7158922758212489, 0.7162354279215241, 0.7213595077644301, 0.7153123272526257, 0.7

## 4. Ridge regression using normal equations

In [11]:
for poly_degree in poly_degrees:
    for lambda_ in lambdas:
        evaluation_result = cross_val(tX, y, equalize, split_size, poly_degree, ridge_regression, lambda_=lambda_)

        evaluation_data.append(evaluation_result)
        print(evaluation_result)

{'method': 'ridge_regression', 'poly_degree': 2, 'parameters': {'lambda_': 1}, 'train_losses': [0.39481375758823406, 0.38951813078136727, 0.3890868790575823, 0.390192844032208, 0.390508571754282, 0.3922696248226068, 0.38816422723841987, 0.39149696225979713, 0.39050723876101645, 0.39317872361203693], 'test_losses': [0.3948137575882341, 0.3895181307813674, 0.3890868790575825, 0.3901928440322081, 0.3905085717542821, 0.3922696248226069, 0.38816422723841987, 0.3914969622597973, 0.3905072387610166, 0.39317872361203704], 'accuracies': [0.74256, 0.7432, 0.74208, 0.744, 0.74752, 0.74344, 0.74616, 0.74048, 0.74168, 0.73912], 'f1_scores': [0.6039872015751907, 0.6110976496244245, 0.6028578467602859, 0.6110841030627128, 0.613235294117647, 0.6103754100352327, 0.6143187066974595, 0.6138095238095238, 0.6018004686151189, 0.604870956015994], 'precisions': [0.6403966597077244, 0.6316053092912597, 0.6280800821355236, 0.6380710659898478, 0.635670731707317, 0.6426195958045536, 0.6339688911189162, 0.63217263

## 5. Logistic regression using gradient descent or SGD

In [12]:
for poly_degree in poly_degrees:
    for max_iter in max_iters:
        for gamma in gammas:
            initial_w = np.array(np.zeros(num_param*poly_degree+1))
            evaluation_result = cross_val(tX, y, equalize, 10, poly_degree, logistic_regression, max_iters=max_iter, gamma=gamma, initial_w=initial_w)

            evaluation_data.append(evaluation_result)
            print(evaluation_result)

Iteration =  0 	loss =  77979.05781299151
Iteration =  0 	loss =  799956.7802994566
Iteration =  0 	loss =  644585.7351826786
Iteration =  0 	loss =  656570.4578202947
Iteration =  0 	loss =  650797.8244934919
Iteration =  0 	loss =  667469.9602483811
Iteration =  0 	loss =  646754.7596455138
Iteration =  0 	loss =  682702.9387173599
Iteration =  0 	loss =  642830.9058285144
Iteration =  0 	loss =  656940.3929710733
{'method': 'logistic_regression', 'poly_degree': 2, 'parameters': {'max_iters': 10, 'gamma': 0.001, 'initial_w': array([ 46.96952643, -43.74279511,  -9.17552526, -10.85631567,
        -6.37601504,  23.78888683,   2.61274931,  64.60299997,
       -11.23380239,   9.37309074, -16.94806871,   5.79078209,
         8.89059092,  47.07736667,  -0.49427679,   2.00561962,
        32.26393266,   0.61213574,  -0.09881163, -14.7293513 ,
         2.87248857,  14.40940348,  14.33528005,  -6.11779723,
         1.40351722,  -0.31660912, -10.27894336,  -0.15334109,
        -0.17078174,  -6.8

## 6. Regularized logistic regression using gradient descent or SGD

In [13]:
for max_iter in max_iters:
    for gamma in gammas:
        for lambda_ in lambdas:
            initial_w = np.array(np.zeros(num_param*poly_degree+1))
            evaluation_result = cross_val(tX, y, False, 10, poly_degree, reg_logistic_regression, max_iters=max_iter, gamma=gamma, initial_w=initial_w, lambda_=lambda_)

            evaluation_data.append(evaluation_result)
            print(evaluation_result)

Iteration =  0 	loss =  77979.05781299151
Iteration =  0 	loss =  815535.8173434455
Iteration =  0 	loss =  663359.1626492054
Iteration =  0 	loss =  678780.6786224103
Iteration =  0 	loss =  679364.3771121848
Iteration =  0 	loss =  697505.3250842207
Iteration =  0 	loss =  672684.598028374
Iteration =  0 	loss =  716019.233660952
Iteration =  0 	loss =  680865.0026237111
Iteration =  0 	loss =  700159.6384825982
{'method': 'reg_logistic_regression', 'poly_degree': 2, 'parameters': {'max_iters': 10, 'gamma': 0.001, 'initial_w': array([ 42.95772183, -42.45555974,  -8.34707237,  -9.32088058,
        -5.15031001,  22.30084351,   2.49947362,  60.0732953 ,
       -11.40354174,   7.74541286, -16.45566642,   6.63353315,
         9.02944669,  45.29339198,  -0.54537617,   2.03413515,
        29.93461505,   0.59816705,  -0.09396363, -13.53380651,
         2.95523761,  12.65720734,  12.88530611,  -7.10448811,
         1.38192559,  -0.24070316, -10.9613318 ,  -0.09986834,
        -0.11249121,  -7

## Final overview

In [14]:
for el in evaluation_data:
    print(el)

{'method': 'least_squares_GD', 'poly_degree': 2, 'parameters': {'initial_w': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'max_iters': 10, 'gamma': 0.001}, 'train_losses': [0.49390621833261733, 0.4929860681621087, 0.4931382898371554, 0.4931076496929922, 0.49326816169689564, 0.49321434364129807, 0.4932786988849711, 0.4929262909989992, 0.4929618966354508, 0.4934290349405035], 'test_losses': [0.49390621833261733, 0.4929860681621086, 0.49313828983715535, 0.49310764969299237, 0.49326816169689586, 0.49321434364129824, 0.49327869888497106, 0.4929262909989992, 0.49296189663545087, 0.4934290349405035], 'accuracies': [0.70016, 0.70736, 0.70744, 0.70504, 0.70552, 0.70464, 0.70576, 0.70408, 0.70456, 0.70064], 'f1_scores': [0.5661032646445936, 0.5739576054041463, 0.56716771