In [2]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [3]:
from proj1_helpers import *
DATA_TRAIN_PATH = '../data/train.csv' # TODO: download train data and supply path here 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

## Do your thing crazy machine learning thing here :) ...

In [4]:
y.shape, tX.shape

((250000,), (250000, 30))

In [5]:
# Compute new_tX : column of ones followed by tX
first_col = np.ones((tX.shape[0],1))
new_tX = np.concatenate((first_col, tX), axis=1)
new_tX.shape

(250000, 31)

In [6]:
def compute_loss(y, tx, w):
    N = len(y)
    e = y - np.dot(tx,w)
    loss = 1/(2*N) * np.dot(e.T,e)
    return loss
    
def compute_gradient(y, tx, w):
    N = len(y)
    e = y - np.dot(tx,w)
    return -1/N * np.dot(tx.T, e)

# Linear regression using gradient descent
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    ws = [initial_w]
    losses = []
    w = initial_w
    
    for n_iter in range(max_iters):
        loss = compute_loss(y,tx,w)
        gradient = compute_gradient(y,tx,w)
        
        w = w - gamma*gradient

        ws.append(w)
        losses.append(loss)
        
        print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
              bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))

    return losses, ws

In [7]:
# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.0000001 
# 0.00000001 : after 50 iterations : 556513.5 loss
# 0.0000001 : after 50 iterations : 97535 loss

# Initialization
w_initial = np.ones((31,))

# Start gradient descent.
gradient_losses, gradient_ws = least_squares_GD(y, new_tX, w_initial, max_iters, gamma)

Gradient Descent(0/49): loss=25250993.002723932, w0=1.0005252902751836, w1=0.917113831786522
Gradient Descent(1/49): loss=5078286.820619153, w0=1.0007221563962507, w1=0.870526726972049
Gradient Descent(2/49): loss=1380135.289603642, w0=1.0007795004675737, w1=0.8397516623891288
Gradient Descent(3/49): loss=681465.0763505857, w0=1.0007778789428414, w1=0.8160379626385416
Gradient Descent(4/49): loss=530507.864904485, w0=1.0007516355492305, w1=0.7956414700655863
Gradient Descent(5/49): loss=481025.46312639373, w0=1.000715403244278, w1=0.7769539984607841
Gradient Descent(6/49): loss=451670.1202810963, w0=1.0006754082495695, w1=0.7592789151494876
Gradient Descent(7/49): loss=427495.14221563534, w0=1.0006342916098392, w1=0.7423088588458108
Gradient Descent(8/49): loss=405648.14290666924, w0=1.0005931657384215, w1=0.7259029454630265
Gradient Descent(9/49): loss=385489.5057428921, w0=1.000552491583502, w1=0.7099917176518078
Gradient Descent(10/49): loss=366793.4166104155, w0=1.0005124527468108,

In [23]:
from utils import helpers

In [28]:
# Linear regression using stochastic gradient descent
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    
    batch_size = 1
    
    losses = []
    ws = [initial_w]
    w = initial_w
    
    for n_iter in range(max_iters):
        loss = compute_loss(y, tx, w)
        
        gradient = 0
        for minibatch_y, minibatch_tx in helpers.batch_iter(y, tx, batch_size, 1, True):
            gradient += compute_gradient(minibatch_y, minibatch_tx, w)
            
        gradient /= batch_size
        
        w = w - gamma * gradient
        
        ws.append(w)
        losses.append(loss)
        print("Gradient Descent({bi}/{ti}): loss={l}, w0={w0}, w1={w1}".format(
                bi=n_iter, ti=max_iters - 1, l=loss, w0=w[0], w1=w[1]))
    
    return losses, ws

In [29]:
# Define the parameters of the algorithm.
max_iters = 50
gamma = 0.0000001 # loss = 90047

# Initialization
w_initial = np.ones((31,))

# Start gradient descent.
stoch_gradient_losses, stoch_gradient_ws = least_squares_SGD(y, new_tX, w_initial, max_iters, gamma)

Gradient Descent(0/49): loss=25250993.002723932, w0=0.9998871709, w1=0.983564185003
Gradient Descent(1/49): loss=25154746.500849664, w0=0.9997714078103692, w1=0.9725471275259261
Gradient Descent(2/49): loss=25023855.605147183, w0=1.0007214938102476, w1=1.083861103443661
Gradient Descent(3/49): loss=430430.8032495237, w0=1.000653812418108, w1=1.0778286609622507
Gradient Descent(4/49): loss=473993.31137771526, w0=1.0006824576329998, w1=1.0810616458506
Gradient Descent(5/49): loss=419743.60435273283, w0=1.0006869958333802, w1=1.0815943897314508
Gradient Descent(6/49): loss=415308.5935033907, w0=1.0006989659333587, w1=1.0696362598529439
Gradient Descent(7/49): loss=414200.30678639415, w0=1.0008169272333112, w1=0.9517929212003889
Gradient Descent(8/49): loss=679918.2095788867, w0=1.0007100133990223, w1=0.9419789796095206
Gradient Descent(9/49): loss=382970.01449370873, w0=1.000686816234519, w1=0.939267741416717
Gradient Descent(10/49): loss=401313.6223387828, w0=1.000571825594423, w1=0.9326

In [None]:
# Least squares regression using normal equations
def least_squares(y, tx):
    # TODO 

In [None]:
# Ridge regression using normal equations
def ridge_regression(y, tx, lambda_):
    # TODO

In [None]:
# Logistic regression using gradient descent or SGD
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    # TODO

In [None]:
# Regularized logistic regression using gradient descent or SGD
def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    # TODO

## Generate predictions and save ouput in csv format for submission:

In [10]:
DATA_TEST_PATH = '' # TODO: download train data and supply path here 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [31]:
OUTPUT_PATH = '' # TODO: fill in desired name of output file for submission
y_pred = predict_labels(weights, tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)