In [38]:
# Useful starting lines
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
from implementations import *
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the training data into feature matrix, class labels, and event ids:

In [None]:
#unzip csv 
import zipfile
with zipfile.ZipFile('../data/train.csv.zip') as zip_ref:
    zip_ref.extractall(r"../data")

In [32]:
from proj1_helpers import *

#load data
DATA_TRAIN_PATH = '../data/train.csv' #download train data and supply path here 
y, x, ids = load_csv_data(DATA_TRAIN_PATH)

print(y.shape, tx.shape, ids.shape)

(250000,) (250000, 31) (250000,)


### Comparing the Models

#### Replacing Nan

In [3]:
'''
for i in range(tx.shape[1]):
    idx = tx[:,i] > -999
    mean = np.mean(tx[idx,i])
    tx[idx==False,i] = mean
'''

#### Standardize the data

In [33]:
tx = standardize(x)
#add constant term
tx = np.c_[np.ones((y.shape[0], 1)), tx]

#### Split the data 

In [34]:
x_train, y_train, x_test, y_test = split_data(tx, y, 0.8, seed=1)

#### Test Models

In [35]:
gamma = 0.1
max_iters = 200
initial_w = np.zeros(31)

gradients = [least_squares_GD(y_train, x_train, initial_w, max_iters,gamma),
             least_squares_SGD(y_train, x_train, initial_w, max_iters, gamma)]
gradients_names = ["Gradient Descent","Stochastic Gradient Descent"]
print('-----Without standardization-----')
for i in range (len(gradients)):
    w,loss = gradients[i]
    print("{name}, w*={w}, loss={l}\n".format(name=gradients_names[i],w=w, l=loss))

-----Without standardization-----
Gradient Descent, w*=[-3.15315277e-01  3.25785508e-02 -2.45182886e-01 -2.30293148e-01
 -6.33208640e-03 -2.18340804e-02  3.61055109e-01 -3.00852094e-02
  2.50415470e-01 -8.81265611e-03 -1.43471810e-03 -1.54073444e-01
  1.17194904e-01 -2.39527293e-02  1.97900847e-01 -1.68415537e-04
 -1.39955617e-03  2.45511345e-01 -7.61827390e-04  2.76899405e-03
  1.06546237e-01  3.31355774e-04 -7.20558566e-02 -1.45140481e-01
  3.82004279e-02  4.28146148e-02  4.28701819e-02 -2.54948783e-02
 -2.47437461e-02 -2.49263472e-02 -1.02213569e-01], loss=1.3606489531198662

Stochastic Gradient Descent, w*=[ 8.33378636e+09 -1.53972674e+10  1.33238171e+10  5.98935269e+09
  1.37916471e+09  5.18729416e+09  8.65325628e+09  5.21585814e+09
  1.58756464e+10 -2.05737479e+10  2.69859236e+10 -4.01782168e+09
  2.88172867e+10  5.19001620e+09 -1.47156060e+10  3.88264991e+10
  2.04976604e+10 -1.48836373e+10  3.48482095e+10 -8.56301751e+10
  2.53408968e+10 -2.09475595e+09  1.75880300e+10 -7.56247

In [36]:
gamma = 0.01
initial_w = np.zeros(31)
lambda_ = 0.00001

regression =[least_squares(y_train, x_train),
             ridge_regression(y_train, x_train,lambda_)]
                  
regression_names =  ["Least Squares", "Ridge Regression"]

print('-----Without standardization-----')
for i in range (len(regression)):
    w,loss = regression[i]
    print("{name}, w*={w}, loss={l}\n".format(name=regression_names[i],w=w, l=loss))

-----Without standardization-----
Least Squares, w*=[-3.15261032e-01  2.94719305e-02 -2.50147546e-01 -2.53502626e-01
 -2.70566661e-02 -1.79922408e+00  3.06662969e-01 -1.03168913e+01
  2.68059113e-01 -2.79310143e-03 -6.21645887e+02 -1.81119927e-01
  1.15267145e-01  2.07794063e+01  1.20603734e+02  2.94979326e-04
 -1.99948852e-03  1.18830544e+02 -2.71160934e-04  1.75796574e-03
  1.17976665e-01 -3.55247270e-04 -6.46272451e-02 -2.00359300e-01
 -7.75846768e-02 -2.87377816e-02  2.55573552e-01  5.85153872e-03
 -2.99306345e+00 -5.75389125e+00  5.26521481e+02], loss=1.356655379266693

Ridge Regression, w*=[-3.15262791e-01  2.93326090e-02 -2.50838506e-01 -2.53758198e-01
 -2.47788613e-02  1.40943392e+00  3.92019311e-01 -2.08808955e+00
  2.68016410e-01 -3.55560071e-03  1.09817090e-02 -1.81122252e-01
  1.16004833e-01  1.06071728e+00  1.90188366e-01 -3.69490607e-05
 -1.25063290e-03  2.82236999e-01 -8.27597652e-04  2.76019582e-03
  1.17936726e-01  3.20108801e-04 -6.43743295e-02 -2.00082289e-01
 -1.242

In [41]:
initial_w = np.zeros(31)
gamma = 1e-6
max_iters = 500
lambda_ = 1

logistic = [logistic_regression(y_train, x_train, initial_w, max_iters, gamma),
            reg_logistic_regressions(y_train, x_train, lambda_, initial_w, max_iters, gamma)]
logistic_names = ['Logistic Ridge Regression','Reg Logistic Ridge Regression']

print('-----Without standardization-----')
for i in range (len(logistic)):
    w,loss = logistic[i]
    print("{name}, w*={w}, loss={l}\n".format(name=logistic_names[i],w=w, l=loss))

-----Without standardization-----
Logistic Ridge Regression, w*=[-9.02362835e+01  1.90010982e+01 -2.63768758e+01 -6.35707906e-01
  8.33880480e+00  2.09563480e+00  6.51201007e+00  2.00571717e+00
  7.04647415e+00 -3.63222572e+00  3.93301211e+00 -1.38879478e+01
  1.76608052e+01  2.07062412e+00  1.44465864e+01 -4.27144153e-02
 -4.30340431e-01 -2.95407713e+00  1.46947450e-01  3.81912723e-01
 -3.16452968e+00  4.33387115e-01  2.96929994e+00  2.31871959e+00
  7.39261020e+00  7.30466075e+00  7.30479633e+00  1.94670878e+00
  2.06388016e+00  2.06309502e+00  2.00454893e+00], loss=nan

Reg Logistic Ridge Regression, w*=[-9.02638999e+01  1.89606816e+01 -2.64382245e+01 -6.83593990e-01
  8.31108761e+00  2.07925745e+00  6.49394692e+00  1.98933608e+00
  6.98691164e+00 -3.66807811e+00  3.91000867e+00 -1.39322703e+01
  1.76294379e+01  2.05424614e+00  1.43966236e+01 -8.79538583e-02
 -4.77229813e-01 -2.99208352e+00  1.01724705e-01  3.35239656e-01
 -3.20116940e+00  3.88159064e-01  2.94445783e+00  2.29997506e