In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

from implementations import *
from evaluation import *
import datetime

## Load the training data into feature matrix, class labels, and event ids:

In [2]:
from proj1_helpers import *
DATA_TRAIN_PATH = 'data/train.csv'
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

In [3]:
tX.shape

(250000, 30)

## Properties of dataset

In [5]:
from collections import Counter
Counter(y)

Counter({1.0: 85667, -1.0: 164333})

## Collect data

In [18]:
evaluation_data = []

## 1. Linear regression using gradient descent

In [9]:
# Parameters
#max_iters = [10, 100, 500, 750, 1000, 1500]
#gammas = [0.01, 0.049, 0.05, 0.053, 0.07, 0.1]
max_iters = [100]
gammas = [0.1]
initial_w = np.array(np.zeros(30))

for max_iter in max_iters:
    for gamma in gammas:
        evaluation_result = cross_val(tX, y, 10, least_squares_GD, initial_w=initial_w, max_iters=max_iter, gamma=gamma)
        
        evaluation_data.append(evaluation_result)
        print(evaluation_result)

{'method': 'least_squares_GD', 'parameters': {'initial_w': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'max_iters': 100, 'gamma': 0.1}, 'train_losses': [0.39548968140058166, 0.3916020593965087, 0.39317572860129535, 0.39409773783744545, 0.3858473495155666, 0.3914931625731311, 0.3903875817150668, 0.38914223094225703, 0.3917270112589671, 0.3913170498962297], 'test_losses': [0.39548968140058155, 0.39160205939650844, 0.3931757286012953, 0.3940977378374454, 0.38584734951556676, 0.3914931625731312, 0.39038758171506666, 0.3891422309422574, 0.39172701125896686, 0.39131704989622984], 'accuracies': [0.71332, 0.71768, 0.72024, 0.71808, 0.72288, 0.71808, 0.71888, 0.71884, 0.72024, 0.71412], 'f1_scores': [0.6571633580483138, 0.6593300511632397, 0.6614714424007745, 0.6636120656739215, 0.6690551256329417, 0.6607624181748171, 0.6626667946625708, 0.6627968337730871, 0.6649099271751628, 0.6583488694488264], 'preci

## 2. Linear regression using stochastic gradient descent

In [10]:
# Parameters
#max_iters = [10, 100, 500, 750, 1000, 1500]
#gammas = [0.01, 0.049, 0.05, 0.053, 0.07, 0.1]
max_iters = [100]
gammas = [0.1]
initial_w = np.array(np.zeros(30))

for max_iter in max_iters:
    for gamma in gammas:
        evaluation_result = cross_val(tX, y, 10, least_squares_SGD, initial_w=initial_w, max_iters=max_iter, gamma=gamma)
        
        evaluation_data.append(evaluation_result)
        print(evaluation_result)

{'method': 'least_squares_GD', 'parameters': {'initial_w': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]), 'max_iters': 100, 'gamma': 0.1}, 'train_losses': [1100.703136497316, 1.4826940462142755, 6.395254949266865, 2.0873779666707293, 11.222745112384546, 1287.10422272636, 384.3715305508267, 649.10428803012, 50.61254962093232, 15.936554961221768], 'test_losses': [1100.7031364973168, 1.4826940462142764, 6.395254949266866, 2.08737796667073, 11.222745112384555, 1287.1042227263604, 384.3715305508267, 649.1042880301197, 50.61254962093234, 15.936554961221768], 'accuracies': [0.61504, 0.43972, 0.47856, 0.5742, 0.41276, 0.48336, 0.60108, 0.53852, 0.46028, 0.4692], 'f1_scores': [0.4750163648265328, 0.3873507413725233, 0.45102333024509395, 0.521164140164635, 0.3865792002674132, 0.3748911044429387, 0.4634999193071171, 0.473893018377491, 0.3932185096910554, 0.3783378618945001], 'precisions': [0.4448304045770331,

## 3. Least squares regression using normal equations

In [11]:
evaluation_result = cross_val(tX, y, 10, least_squares)
        
evaluation_data.append(evaluation_result)
print(evaluation_result)

{'method': 'least_squares_GD', 'parameters': {}, 'train_losses': [0.3935872822211309, 0.38962890010787127, 0.3911282096028302, 0.39245496612125563, 0.384080020617261, 0.3898468714835387, 0.3881959748142648, 0.387115766122765, 0.3897684825423807, 0.3894472205196128], 'test_losses': [0.3935872822211308, 0.3896289001078713, 0.39112820960283035, 0.39245496612125547, 0.38408002061726093, 0.38984687148353886, 0.38819597481426504, 0.3871157661227651, 0.38976848254238067, 0.3894472205196128], 'accuracies': [0.71328, 0.71816, 0.72056, 0.71944, 0.72452, 0.7186, 0.71988, 0.72044, 0.71988, 0.71564], 'f1_scores': [0.6590887472652905, 0.6615103766333589, 0.6633253012048194, 0.6671412300683371, 0.6725932968861422, 0.6633165829145728, 0.666094502455538, 0.6663484031126176, 0.6665714421749274, 0.6618143761000904], 'precisions': [0.5551193718955295, 0.5561389337641357, 0.5616125346825527, 0.5642157649703002, 0.5722375020223265, 0.559322033898305, 0.5658161198865937, 0.5625503788489441, 0.563198970150454

## 4. Ridge regression using normal equations

In [15]:
# Parameters
#lambda = [10, 100, 500, 750, 1000, 1500]
lambdas = [100]

for lambda_ in lambdas:
    evaluation_result = cross_val(tX, y, 10, ridge_regression, lambda_=lambda_)
        
    evaluation_data.append(evaluation_result)
    print(evaluation_result)

{'method': 'least_squares_GD', 'parameters': {'lambda_': 100}, 'train_losses': [0.49808919954776093, 0.49805578618730484, 0.4980850296084491, 0.49808969491465027, 0.49788212609585175, 0.49802018727168074, 0.49801802664701983, 0.4979662920990489, 0.4980285133402161, 0.49801177101303734], 'test_losses': [0.49808919954776104, 0.4980557861873047, 0.49808502960844947, 0.49808969491465044, 0.4978821260958519, 0.4980201872716809, 0.49801802664702, 0.4979662920990491, 0.49802851334021636, 0.49801177101303784], 'accuracies': [0.67144, 0.67636, 0.67756, 0.6732, 0.67808, 0.68056, 0.6776, 0.67384, 0.67636, 0.67356], 'f1_scores': [0.5841854814214842, 0.5869198958492878, 0.588030868298666, 0.5884545637719122, 0.5944366055230801, 0.5941248221183167, 0.5907382959276937, 0.5874316939890711, 0.5888510595050562, 0.5855887878941757], 'precisions': [0.5147190008920607, 0.515469464622007, 0.5196459217776171, 0.5197544046983449, 0.5279742189598067, 0.5232297914242234, 0.5256641966383517, 0.5172413793103449, 

## 5. Logistic regression using gradient descent or SGD

In [17]:
# Parameters
#max_iters = [10, 100, 500, 750, 1000, 1500]
#gammas = [0.01, 0.049, 0.05, 0.053, 0.07, 0.1]
max_iters = [100]
gammas = [0.1]
initial_w = np.array(np.zeros(30))

for max_iter in max_iters:
    for gamma in gammas:
        evaluation_result = cross_val(tX, y, 10, logistic_regression, max_iters=max_iter, gamma=gamma, initial_w=initial_w)
        
        evaluation_data.append(evaluation_result)
        print(evaluation_result)

Current iteration = 0, loss=155958.1156259835
Current iteration = 50, loss=1331373.2441326166
Current iteration = 0, loss=1416124.3618095487
Current iteration = 50, loss=1422251.7772293538
Current iteration = 0, loss=1615382.7046323447
Current iteration = 50, loss=1386970.2216129894
Current iteration = 0, loss=1578876.030725352
Current iteration = 50, loss=1273042.9131439137
Current iteration = 0, loss=2544291.7003305657
Current iteration = 50, loss=1266155.3308820862
Current iteration = 0, loss=1598665.4177813292
Current iteration = 50, loss=1569264.890476886
Current iteration = 0, loss=1590819.9526557573
Current iteration = 50, loss=1290987.6981899387
Current iteration = 0, loss=1584004.8812991611
Current iteration = 50, loss=1597456.9446330974
Current iteration = 0, loss=1682672.2202856692
Current iteration = 50, loss=1251469.4757469415
Current iteration = 0, loss=1554665.9076509087
Current iteration = 50, loss=1311945.4227594854
{'method': 'least_squares_GD', 'parameters': {'max_it

## Final overview

In [19]:
for el in evaluation_data:
    print(el)