In [7]:
%load_ext autoreload
%autoreload 2

from algebra import *
from cache import *
from costs import *
from features import *
from gradients import *
from helpers import *
from model import *
from splits import *

import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import csv
import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [8]:
SUB_SAMPLE = True
CACHE_DIR = "test/cache/" if SUB_SAMPLE else "cache/"
SUBMISSIONS_DIR = "test/submissions/" if SUB_SAMPLE else "submissions/"

In [9]:
y, x, ids = load_csv_data('data/train.csv', SUB_SAMPLE)

# 1 - Analytical Results

### Ridge Regression with Fixed Degree

##### Without Validation

In [20]:
def clean_standardize_expand(y, x, h):
        
    degree = int(h['degree'])

    x = remove_errors(x)
    x = remove_outliers(x)
    x = standardize_all(x)
    x = remove_nan_features(x)
    x = build_poly(x, degree)

    return y, x

In [None]:
def ridge_regression_analytical(y, x, h):

    lambda_ = float(h['lambda'])
    degree = int(h['degree'])

    w = ridge_regression(y, x, lambda_)
    
    return {
        'w': w,
        'mse': compute_mse(y, x, w)
    }

In [None]:
hs = { 
    'degree': [5, 6, 7], 
    'lambda': 1e-4,
}

_ = evaluate(
    clean = clean_standardize_expand, 
    fit   = ridge_regression_analytical, 
    x     = x, 
    y     = y, 
    hs    = hs, 
    cache = CACHE_DIR + 'clean_standardize_expand_ridge_regression_analytical'
)

##### Using Cross-Validation

Here, we implement the same model with cross-validation.

In [None]:
hs = { 
    'degree': np.arange(4, 16), 
    'lambda': np.logspace(-8, -4, 5),
    'k_fold': 4,
    'seed': 0
}

def mse(y, x, w):
    return { 'mse' : compute_mse(y, x, w) }



evaluate(
    clean = cross_validate(ridge_regression_analytical, mse), 
    fit   = fit_function, 
    x     = x,
    y     = y, 
    hs    = hs, 
    cache = CACHE_DIR + 'clean_standardize_expand_cross_validate_ridge_regression_analytical_mse'
)

In [None]:
# myModel.predict(best_h, x, y, SUBMISSIONS_DIR + 'RidgeRegression_MSE_FixedDegree_CrossValidation_Model')

## Gradient Descents

#### Least Square

#### Ridge Regression

#### Lasso

# Logistic Regression

In [18]:
def map_logistic(clean):
    
    def inner_function(y, x, h):
        y, x = clean(y, x, h)
        y = np.where(y == 1, 1, 0)
        return y, x
    
    return inner_function

def logistic_gradient(y, x, w, h):
    
    return compute_logistic_gradient(y, x, w)
            
def logistic_error(y, x, w, h):
    
    return { 
        'logistic_err': compute_logistic_error(y, x, w),
        'n_err': compute_error_count(predict_logistic)(y, x, w)
    }

def logistic_gradient_ridge(y, x, w, h):
    
    lambda_ = h['lambda']
    
    return compute_logistic_gradient(y, x, w) + lambda_ * w

def logistic_error_and_ridge(y, x, w, h):
    
    lambda_ = h['lambda']
    
    ridge_norm = np.linalg.norm(w, 2) * lambda_
    logistic_err = compute_logistic_error(y, x, w)
    n_err = compute_error_count(predict_logistic)(y, x, w)
    
    return {
        'logistic_err': logistic_err,
        'ridge_norm': ridge_norm,
        'total_loss': logistic_err + ridge_norm,
        'n_err': n_err
    }

def logistic_gradient_lasso(y, x, w, h):
    
    lambda_ = h['lambda']
    
    return compute_logistic_gradient(y, x, w) + lambda_ * np.sign(w)

def logistic_error_and_lasso(y, x, w, h):
    
    lambda_ = h['lambda']
    
    lasso_norm = np.linalg.norm(w, 1) * lambda_
    logistic_err = compute_logistic_error(y, x, w)
    n_err = compute_error_count(predict_logistic)(y, x, w)
    
    return {
        'logistic_err': logistic_err,
        'lasso_norm': lasso_norm,
        'total_loss': logistic_err + lasso_norm,
        'n_err': n_err
    }

### Without Validation

##### Stochastic Gradient Descent

In [30]:
hs = {
    'batch_size': 2500,
    'degree': np.concatenate([[-2], np.arange(1, 7)]),
    'gamma': [1e-2, 1e-3], 
    'k_fold': 4,
    'lambda': 0,
    'max_iters': 3000,
    'num_batches': 1,
    'seed': 1,
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_descent')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = descent_with_loss(stochastic_gradient_descent_e(logistic_gradient), logistic_error),  
        round_size = 100,
        cache      = cache,
        multiple   = False,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

[-5.66090433e-02 -2.33158829e-01  1.54392859e-02  7.13685914e-02
 -2.28364642e-01  8.85929620e-02  1.81609451e-02  6.79385449e-02
 -1.57624605e-01  1.83554191e-03  3.25578471e-02  1.06221497e-01
  3.69882868e-03 -1.32188842e-01 -1.31989430e-02  1.82590860e-01
  3.42112172e-02  1.91565927e-01  4.66034936e-02  2.00314785e-02
 -9.90202102e-02 -3.59525230e-03 -5.09962015e-02  1.87760983e-02
 -7.86058461e-03  1.58727369e-02 -1.34388230e-01  3.81637454e-03
 -2.69126551e-02 -2.36458298e-02 -3.47310647e-03 -4.66701551e-03
 -2.92534988e-02  7.85894007e-02 -5.08232657e-02  9.36096724e-02
 -4.04935316e-02  8.27693349e-02 -2.12741365e-02 -5.11344342e-02
  1.56752939e-02 -6.87943374e-02  6.21099735e-02  1.95177918e-02
  7.31228713e-03  3.08135979e-02 -7.83645882e-03 -1.46074521e-02
  1.46859262e-02  3.75379822e-05 -6.87021791e-04 -1.07898604e-02
 -1.29628379e-02  7.62668440e-03  3.19266557e-02  3.40163192e-02
  2.58248548e-02 -2.32883442e-03 -1.94462854e-01 -7.10885609e-03
 -1.06259766e-02 -1.41300

iteration 600 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 501}
[-6.51881981e-02 -3.30378491e-01  5.59165665e-02  1.36262555e-01
 -3.20069859e-01  9.68299906e-02  1.31975797e-02  1.43599513e-01
 -2.22579683e-01 -1.86375726e-02  4.25957434e-02  1.40082303e-01
  2.25497064e-02 -1.89028502e-01 -1.15498985e-02  2.44164882e-01
  8.09398102e-02  2.82954086e-01  2.85873204e-02  2.51270695e-02
 -1.10311846e-01 -1.11464665e-02 -5.34365879e-02  4.91262694e-02
 -7.00215219e-03  3.11021061e-02 -1.77864652e-01  1.61928819e-02
 -1.22290887e-02 -5.35840223e-02 -6.84672644e-03 -2.54272504e-02
 -2.12661153e-02  9.59008440e-02 -5.08321198e-02  1.07206386e-01
 -4.80822641e-02  9.52863861e-02 -2.47481486e-02 -8.42692083e-02
 -1.76629344e-03 -1.25416851e-01  8.45862350e-02  9.78526650e-03
  1.18695469e-02  4.59823686e-02 -5.02774414e-03 -2.93502579e-02
  2.13262870e-02  3.23305582e-03 -1.31015843e-02 -1.66104914e-02
 -1.25602716e-

iteration 900 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 801}
[-7.80862222e-02 -3.82179413e-01  9.34509500e-02  1.88850082e-01
 -3.82124954e-01  1.02750554e-01  1.67689491e-02  1.97737524e-01
 -2.58083912e-01 -2.77731673e-02  4.31440770e-02  1.60385886e-01
  1.99068370e-02 -2.22226632e-01 -2.96076583e-03  2.83815397e-01
  1.11707887e-01  3.49092834e-01  1.44798182e-02  3.07080402e-02
 -1.09065970e-01 -6.40565424e-03 -5.47374351e-02  8.22974256e-02
  5.21080098e-03  3.96875003e-02 -2.14765494e-01  1.89513453e-02
 -9.03475311e-03 -5.82108265e-02  8.36526586e-04 -3.61021508e-02
 -2.91817284e-02  9.79335772e-02 -5.38521742e-02  1.02805265e-01
 -4.36405350e-02  9.78275972e-02 -4.22766733e-02 -1.16173171e-01
 -2.45356089e-02 -1.72066712e-01  9.05388581e-02 -1.34557332e-03
  1.00126179e-02  6.74706107e-02 -5.13478466e-03 -3.06149200e-02
  2.39011835e-02 -4.31727987e-04 -5.62752136e-03 -2.64838191e-02
 -9.10007673e-

iteration 1200 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 1101}
[-0.09125287 -0.42095546  0.11461867  0.22360418 -0.4425019   0.10264674
  0.00532844  0.24096732 -0.2807984  -0.03655234  0.04140979  0.17676022
  0.03655606 -0.24567716 -0.01941709  0.30417538  0.12862108  0.39161116
  0.00961089  0.04051205 -0.13317275 -0.0052075  -0.06452563  0.10503783
  0.00652148  0.05893426 -0.23754614  0.01016677  0.00786848 -0.06622024
 -0.01231342 -0.04852935 -0.0264739   0.09590801 -0.06856268  0.09904995
 -0.03763862  0.10229107 -0.04154471 -0.14297786 -0.04528345 -0.20816275
  0.09803448 -0.00986084  0.01598194  0.08743825 -0.00271951 -0.05221512
  0.03456968  0.00262719 -0.01015223 -0.01446395 -0.01041839  0.01404975
  0.0154243   0.02513753 -0.00808994  0.02775829 -0.39069938 -0.01676768
  0.01028205  0.00240804  0.02600647 -0.03614828 -0.0209891  -0.02149798
 -0.05377052 -0.00273241 -0.03385494 -0.13167169  0.00

iteration 1500 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 1401}
[-1.05195101e-01 -4.66231569e-01  1.24623228e-01  2.61717837e-01
 -4.89442933e-01  1.11929567e-01  6.68841642e-03  2.80072033e-01
 -3.12875880e-01 -3.83926868e-02  5.89821597e-02  1.96750979e-01
  4.22402106e-02 -2.66965454e-01 -1.72833331e-02  3.27012343e-01
  1.37234422e-01  4.34925040e-01  1.67816908e-02  3.54147215e-02
 -1.32962932e-01 -3.59627512e-03 -6.47223664e-02  1.28353880e-01
  8.17974593e-03  5.20882826e-02 -2.56761938e-01  1.21430142e-02
  7.04868776e-03 -7.15593464e-02 -1.23385391e-02 -4.91473707e-02
 -2.87041947e-02  1.03270654e-01 -7.95027333e-02  1.06845065e-01
 -5.27310285e-02  1.12036572e-01 -4.27626821e-02 -1.73644827e-01
 -5.72607731e-02 -2.43062406e-01  1.06769385e-01 -1.32058582e-02
  2.62981857e-02  1.04227342e-01 -4.30281624e-03 -5.70271674e-02
  2.56237719e-02  1.89402100e-03 -6.55203619e-03 -2.03105764e-02
 -1.78895179

iteration 1800 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 1701}
[-1.11103376e-01 -4.88287666e-01  1.45962992e-01  2.84456385e-01
 -5.26380402e-01  1.07823051e-01 -7.87170196e-05  3.10559735e-01
 -3.30211790e-01 -4.36606760e-02  6.62858374e-02  2.06822640e-01
  4.58818667e-02 -2.79411665e-01 -1.13426391e-02  3.39889288e-01
  1.55541451e-01  4.63320778e-01  1.84409772e-02  2.74631405e-02
 -1.32522461e-01  2.87853846e-03 -7.16401672e-02  1.46453382e-01
  1.77170142e-02  5.72379095e-02 -2.69923180e-01  1.22088870e-02
  6.93316034e-03 -7.88627200e-02 -7.60273520e-03 -6.10316110e-02
 -1.92712496e-02  1.00684714e-01 -7.56885080e-02  9.83463076e-02
 -4.83320833e-02  1.13951157e-01 -3.93176346e-02 -1.92164588e-01
 -8.20846247e-02 -2.66985612e-01  9.81579079e-02 -1.87369888e-02
  2.56760180e-02  1.11712636e-01  6.01452988e-03 -6.45568545e-02
  3.66585241e-02  2.95869320e-03 -2.95446597e-03 -3.46031576e-02
 -8.68564624

iteration 2100 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 2001}
[-0.12149787 -0.49982313  0.15596334  0.30607974 -0.54900693  0.11011272
 -0.0007217   0.33545    -0.34262139 -0.05087206  0.06556423  0.21485061
  0.0471281  -0.28842658 -0.01496933  0.35752984  0.16636792  0.49093295
  0.02150826  0.0240475  -0.11645269 -0.00570124 -0.07489433  0.16899996
  0.0383842   0.05827658 -0.28350729  0.01614881  0.00514396 -0.07979899
 -0.0040469  -0.06149968 -0.02337805  0.09344169 -0.08207566  0.09260939
 -0.03910635  0.11244592 -0.04897126 -0.2063197  -0.09789501 -0.28375622
  0.10853085 -0.01716476  0.03472332  0.12180431  0.0136952  -0.07278235
  0.04698769  0.01244515 -0.00199643 -0.04179534 -0.01098392  0.03100839
  0.01079802  0.04156872 -0.02087264  0.03715913 -0.47826464 -0.03633997
  0.01894661  0.01372136  0.02930809 -0.03413259 -0.02750129 -0.04005594
 -0.044917   -0.0134386  -0.0570655  -0.18526136  0.00

iteration 2400 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 2301}
[-1.27922969e-01 -5.02755408e-01  1.54861885e-01  3.19118875e-01
 -5.89788005e-01  1.11051401e-01  3.27785480e-03  3.55933519e-01
 -3.54402687e-01 -5.48885092e-02  6.23139612e-02  2.29497854e-01
  5.35730418e-02 -2.88226653e-01 -1.26657303e-02  3.64482291e-01
  1.82370842e-01  5.06867167e-01  7.77250321e-03  2.67618698e-02
 -1.28318924e-01 -5.39408052e-03 -6.97725536e-02  1.92360991e-01
  4.79264117e-02  5.76919503e-02 -2.99249288e-01  8.55587131e-03
  3.75522050e-03 -8.30992946e-02 -2.70640502e-02 -7.36760473e-02
 -2.48671803e-02  1.01678426e-01 -8.05150808e-02  9.83619109e-02
 -3.44318778e-02  1.21594531e-01 -4.58079309e-02 -2.23103356e-01
 -1.16774095e-01 -2.99674416e-01  1.20944204e-01 -2.44819706e-02
  3.36987093e-02  1.41774430e-01  1.31483066e-02 -7.77401200e-02
  3.47938411e-02  1.04443907e-02  4.76075062e-03 -4.00754255e-02
 -1.01236316

iteration 2700 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 2601}
[-1.33877011e-01 -5.08480748e-01  1.52109385e-01  3.37013298e-01
 -6.12332661e-01  1.14691383e-01  3.19351649e-03  3.84607977e-01
 -3.68657894e-01 -4.64181428e-02  7.00308912e-02  2.35353688e-01
  5.93011239e-02 -2.98634602e-01 -2.41420406e-02  3.81026441e-01
  1.98096553e-01  5.19832037e-01  1.12702815e-02  1.81257763e-02
 -1.18703792e-01 -6.39914528e-03 -5.99389456e-02  2.00830330e-01
  3.91314783e-02  6.17944755e-02 -3.00301270e-01  8.60978701e-03
  8.05020080e-03 -8.09675208e-02 -2.10923909e-02 -6.81608523e-02
 -3.21808525e-02  9.89578603e-02 -7.88932140e-02  1.03372375e-01
 -3.00013350e-02  1.22491530e-01 -5.95474210e-02 -2.35955860e-01
 -1.29983081e-01 -3.13737031e-01  1.29119275e-01 -2.25006158e-02
  3.17386927e-02  1.50151303e-01  2.15135769e-02 -7.72074326e-02
  3.69714661e-02  1.19510470e-02  9.17677586e-03 -4.67273631e-02
 -1.52069277

iteration 3000 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 2901}
None
HERE
iteration 100 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 1}
[-1.16583755e-02 -1.66627939e-02 -1.09582097e-02 -8.46782977e-04
 -3.13098896e-02  1.14130323e-02  4.16251587e-03 -2.05465954e-03
 -1.78531519e-02  9.82123598e-04 -6.86333933e-03  1.02698634e-02
 -3.12778159e-03 -7.94923299e-03 -6.77827756e-03  1.43340527e-02
 -7.26997187e-03  1.22937165e-02  3.48594636e-03  8.47214949e-04
 -1.64676681e-02 -7.01967802e-04 -1.08100141e-02  2.15423285e-04
 -7.25418120e-03  9.34910480e-04 -1.73352539e-02 -2.01387331e-04
 -1.02781447e-02  1.48513851e-03  1.26022052e-03  1.09796500e-03
 -9.98389892e-03  9.48412917e-03 -6.99990913e-03  1.02557072e-02
 -1.08887324e-02  9.86648854e-03 -4.90169561e-03 -6.93792224e-03
  3.12959776e-03 -3.29820175e-03  4.15990613e-03  2.50482

iteration 400 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 301}
[-0.03191681 -0.05360484 -0.0214262   0.00748039 -0.07743872  0.03393463
  0.01408885  0.00256577 -0.05118871  0.0043802  -0.01029328  0.03284613
 -0.00389359 -0.02698059 -0.01179685  0.04815454 -0.01460876  0.04421028
  0.01666473  0.00428081 -0.04633221 -0.00142999 -0.0314808   0.003794
 -0.01378589  0.00477    -0.0519483   0.00136817 -0.0269451   0.00391256
  0.00814874  0.00175984 -0.0272653   0.02812647 -0.0186888   0.02978888
 -0.02927132  0.02962294 -0.00995179 -0.01468011  0.00841423 -0.01010257
  0.0161654   0.00801476 -0.00739818  0.01447509 -0.00113232 -0.00300914
  0.0003471  -0.01065164 -0.00167743 -0.00291607 -0.01033827  0.00030151
  0.01063883  0.01469994  0.01213413 -0.00207877 -0.05759432 -0.00172792
 -0.00692427 -0.00818805  0.00235245 -0.01072981 -0.00221213 -0.0017322
 -0.02577781  0.00016074 -0.00071742 -0.01106603  0.001071

iteration 700 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 601}
[-4.10024999e-02 -8.49597238e-02 -2.12046597e-02  1.70222733e-02
 -1.05151440e-01  4.78245897e-02  1.61237583e-02  9.67173027e-03
 -7.28185432e-02  5.58250848e-03 -5.25418888e-03  4.89634306e-02
 -5.31168841e-03 -4.30814207e-02 -9.47978609e-03  7.41336040e-02
 -1.17698822e-02  6.96776668e-02  2.37419957e-02  6.49435732e-03
 -6.07818889e-02 -1.48800656e-03 -3.91181168e-02  6.50767689e-03
 -1.44988671e-02  8.66608059e-03 -7.16839106e-02  2.93722553e-03
 -3.33962918e-02  9.74246509e-04  7.02311464e-03  2.53891362e-04
 -3.46945739e-02  4.11918614e-02 -2.56315915e-02  4.45855431e-02
 -3.48558254e-02  4.32569645e-02 -1.14958252e-02 -1.91726919e-02
  1.19954639e-02 -1.72396274e-02  2.55884709e-02  1.17386685e-02
 -7.04070309e-03  1.85335735e-02 -1.88576815e-03 -4.45432974e-03
  1.26451601e-03 -1.01303216e-02 -2.97716805e-03 -4.84545388e-03
 -1.08802246e

iteration 1000 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 901}
[-4.46013764e-02 -1.11862309e-01 -1.61118354e-02  2.51643931e-02
 -1.28000311e-01  5.82475685e-02  1.90877386e-02  1.71839544e-02
 -8.83012690e-02  5.89903191e-03  2.72820531e-04  6.08756570e-02
 -5.58891884e-03 -5.78358423e-02 -6.97455380e-03  9.63831112e-02
 -4.95553764e-03  9.12477165e-02  3.02555180e-02  9.33587816e-03
 -6.82872030e-02 -2.92506725e-03 -4.24195194e-02  8.22230872e-03
 -1.39259476e-02  1.25219920e-02 -8.52194283e-02  3.14523271e-03
 -3.29548934e-02 -8.30309812e-04  1.08936142e-02 -2.02295983e-04
 -3.60841371e-02  4.95570230e-02 -3.22755161e-02  5.52505848e-02
 -3.67901173e-02  5.27412299e-02 -1.26584828e-02 -2.51390517e-02
  1.25449383e-02 -2.56162186e-02  3.33718526e-02  1.28900636e-02
 -5.80359087e-03  2.05168571e-02 -4.01024040e-03 -6.89012959e-03
  2.88078246e-03 -9.84278962e-03 -4.04464578e-03 -5.94533406e-03
 -1.16625089

iteration 1300 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 1201}
[-0.04833377 -0.13678062 -0.01293097  0.03360519 -0.14851608  0.0649818
  0.01936027  0.0262164  -0.10156343  0.0050654   0.0031116   0.07007658
 -0.00552852 -0.07308094 -0.01051857  0.11452721  0.00117775  0.1109655
  0.03716601  0.01240729 -0.07687555 -0.00493451 -0.04549351  0.00775277
 -0.01894785  0.01610531 -0.09687182  0.00399949 -0.03245305 -0.00487643
  0.00863697 -0.00194725 -0.03638253  0.05632153 -0.0377462   0.06269121
 -0.03843531  0.05974833 -0.01428831 -0.03055195  0.01387266 -0.03260089
  0.03896609  0.01409205 -0.00364015  0.02340938 -0.00652098 -0.01041877
  0.00621853 -0.01028212 -0.0055305  -0.00558966 -0.01111248  0.00114395
  0.01908544  0.02068868  0.02030973 -0.00235139 -0.12003001 -0.00473626
 -0.00992548 -0.01273053  0.00092494 -0.01678649 -0.00305041 -0.00448122
 -0.0437393   0.00057894 -0.00306791 -0.02641008  0.003

iteration 1600 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 1501}
[-0.05009511 -0.16021129 -0.00693704  0.04230413 -0.16477251  0.0709871
  0.02033074  0.03465566 -0.11447323  0.00376886  0.00880188  0.07902115
 -0.00379497 -0.08595277 -0.00791123  0.13136019  0.00812868  0.12866606
  0.04128975  0.0134788  -0.08140016 -0.00523643 -0.04659712  0.00884284
 -0.01968945  0.01793734 -0.10684716  0.00445341 -0.03200933 -0.0085136
  0.01106254 -0.00214223 -0.03505112  0.06254629 -0.04063161  0.07060764
 -0.03995181  0.06652787 -0.01329917 -0.03635752  0.01555143 -0.04199282
  0.04427691  0.01575951 -0.00055123  0.02413083 -0.00883771 -0.01230156
  0.00594325 -0.00864928 -0.00727782 -0.00647161 -0.01071431  0.0024879
  0.02126857  0.02197889  0.02241507 -0.00130078 -0.13508414 -0.00534858
 -0.00996205 -0.01341876  0.00143064 -0.01772617 -0.00248113 -0.00547998
 -0.0470366   0.00055132 -0.0040732  -0.03104547  0.0038

iteration 1900 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 1801}
[-5.30846192e-02 -1.79386542e-01 -2.09523136e-03  5.02080027e-02
 -1.78247977e-01  7.43567165e-02  1.91440701e-02  4.34138667e-02
 -1.25703034e-01  1.45926569e-03  1.29094305e-02  8.56965435e-02
 -2.81096803e-03 -9.82367324e-02 -8.56653883e-03  1.45181675e-01
  1.42214131e-02  1.45196201e-01  4.50503026e-02  1.46472691e-02
 -8.58167649e-02 -6.26415134e-03 -4.95766074e-02  9.79093466e-03
 -1.65463805e-02  2.04016050e-02 -1.16565590e-01  4.93520904e-03
 -3.25626224e-02 -1.26178157e-02  1.04438081e-02 -3.83170181e-03
 -3.53505727e-02  6.60228397e-02 -4.50222603e-02  7.46671536e-02
 -4.13987078e-02  7.07276632e-02 -1.47138383e-02 -4.02240841e-02
  1.63715620e-02 -4.98218377e-02  4.98753998e-02  1.75764712e-02
  9.77529436e-04  2.63936619e-02 -8.85229711e-03 -1.40984887e-02
  8.62502400e-03 -7.15277912e-03 -7.66685090e-03 -7.92871381e-03
 -8.5638839

iteration 2200 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 2101}
[-5.39846317e-02 -1.95563790e-01  5.43568206e-03  5.74150729e-02
 -1.92253835e-01  7.78694024e-02  1.96164734e-02  5.12124103e-02
 -1.34859676e-01 -6.55952092e-04  1.68404116e-02  9.15619721e-02
 -2.79550116e-03 -1.08690103e-01 -8.00056845e-03  1.57364699e-01
  2.14105563e-02  1.60049716e-01  4.56582211e-02  1.66836670e-02
 -8.76984573e-02 -8.33242806e-03 -5.07205738e-02  1.16589876e-02
 -1.61906914e-02  2.25618673e-02 -1.23949920e-01  5.49322634e-03
 -3.16941615e-02 -1.54512058e-02  1.23364292e-02 -5.32687988e-03
 -3.34290611e-02  6.88912343e-02 -4.96030878e-02  7.80664080e-02
 -4.12859805e-02  7.42298184e-02 -1.51602204e-02 -4.48153687e-02
  1.62898430e-02 -5.73028551e-02  5.46848438e-02  1.85788048e-02
  5.93097353e-03  2.70112931e-02 -1.10087756e-02 -1.67863312e-02
  1.14290613e-02 -3.58541970e-03 -9.40609049e-03 -9.92171916e-03
 -6.8044728

iteration 2500 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 2401}
[-5.51091246e-02 -2.11523889e-01  9.86902920e-03  6.47133866e-02
 -2.05686506e-01  8.08357646e-02  1.94901511e-02  5.93048695e-02
 -1.43826143e-01 -3.91326061e-03  1.75957898e-02  9.76941254e-02
 -8.26718577e-04 -1.18274953e-01 -9.17484882e-03  1.69025947e-01
  2.90979602e-02  1.73447171e-01  4.56911226e-02  1.83589008e-02
 -9.16977715e-02 -8.46407702e-03 -5.00461851e-02  1.41638941e-02
 -1.27440685e-02  2.45916425e-02 -1.32096566e-01  5.51072683e-03
 -3.01873418e-02 -1.94100745e-02  9.09630572e-03 -6.39217486e-03
 -3.32935618e-02  7.27532356e-02 -5.18720107e-02  8.24983821e-02
 -4.25174916e-02  7.81135600e-02 -1.53363255e-02 -4.94590844e-02
  1.59685648e-02 -6.49973515e-02  6.01690634e-02  1.86447600e-02
  6.99334030e-03  2.87112019e-02 -1.16752360e-02 -1.76672957e-02
  1.06381654e-02 -2.79833945e-03 -9.91091346e-03 -1.07119971e-02
 -5.9481247

iteration 2800 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 2701}
[-5.70526241e-02 -2.25746565e-01  1.21459309e-02  7.13314862e-02
 -2.17296560e-01  8.25541951e-02  1.85880330e-02  6.79851201e-02
 -1.52335148e-01 -6.29437489e-03  2.05648790e-02  1.02220297e-01
  2.13824014e-03 -1.27679320e-01 -1.29393694e-02  1.79783280e-01
  3.51972681e-02  1.85320582e-01  4.66274710e-02  1.87696931e-02
 -9.24610891e-02 -9.25821856e-03 -5.07068759e-02  1.53246558e-02
 -1.52804573e-02  2.68114238e-02 -1.37279984e-01  5.11678729e-03
 -3.04002981e-02 -2.32517726e-02  7.09460919e-03 -7.81141467e-03
 -3.36275368e-02  7.41775029e-02 -5.31444555e-02  8.50234954e-02
 -4.33739333e-02  8.04528672e-02 -1.64535576e-02 -5.36362801e-02
  1.56430129e-02 -7.12744219e-02  6.44932284e-02  1.83172106e-02
  8.47820542e-03  3.01945089e-02 -1.31473701e-02 -1.90669911e-02
  1.17821923e-02 -2.46741617e-03 -1.03827736e-02 -1.08202939e-02
 -6.4690680

KeyboardInterrupt: 

###### Stochastic Gradient Descent With Ridge Regression

In [None]:
hs = {
    'batch_size': 2500,
    'degree': [-2] + np.arange(3, 4),
    'gamma': [1e-2, 1e-3], 
    'lambda': [1e-2, 1e-3],
    'k_fold': 4,
    'max_iters': 1000,
    'num_batches': 1,
    'seed': 1,
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_ridge_descent')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = stochastic_gradient_descent_e(logistic_gradient_ridge), 
        loss       = logistic_error_and_ridge, 
        round_size = 100,
        cache      = cache,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

##### Stochastic Gradient Descent With Lasso

In [None]:
hs = {
    'batch_size': 2500,
    'degree': [-2, 1, 2, 3, 4, 5, 6],
    'gamma': [1e-1, 1e-2, 1e-3], 
    'lambda': [1e-1, 1e-2, 1e-3],
    'k_fold': 4,
    'max_iters': 2000,
    'num_batches': 1,
    'seed': 0,
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_lasso_descent')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = stochastic_gradient_descent_e(logistic_gradient_lasso), 
        loss       = logistic_error_and_lasso, 
        round_size = 100,
        cache      = cache,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

### With Cross-Validation

In [None]:
hs = {
    'batch_size': 2500,
    'degree': np.concatenate([[-2], np.arange(1, 7)]),
    'gamma': [1e-2, 1e-3], 
    'k_fold': 4,
    'lambda': 0,
    'max_iters': 3000,
    'num_batches': 1,
    'seed': 0,
    'seed_cv': 0
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_descent_cross_validate')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = cross_validate_descent(
            stochastic_gradient_descent_e(logistic_gradient), 
            logistic_error
        ),
        round_size = 100,
        cache      = cache,
        multiple   = True,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

In [None]:
hs = {
    'batch_size': 2500,
    'degree': 3,
    'gamma': [1e-2, 1e-3], 
    'k_fold': 4,
    'lambda': 0,
    'max_iters': 1000,
    'num_batches': 1,
    'seed': 0,
    'seed_cv': 0
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_descent_cross_validate')

def clean_expand(y, x, h):
        
    degree = int(h['degree'])

    x = remove_errors(x)
    x = remove_outliers(x)
    x = remove_nan_features(x)
    x = build_poly(x, degree)

    return y, x

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = cross_validate_descent(
            stochastic_gradient_descent_e(logistic_gradient), 
            logistic_error
        ),
        round_size = 100,
        cache      = cache,
        multiple   = True,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

# Split Dataset

In [14]:
def split_data(y, x):
    
    def categorize(x):
        if x[0] == -999.0:
            return 0
        else:
            return x[22] + 1
    
    categories = np.apply_along_axis(categorize, 1, x)

    xs = [x[categories == i] for i in np.arange(5)]
    ys = [y[categories == i] for i in np.arange(5)]
    
    return ys, xs

In [16]:
y_split, x_split split_data(y, x)

([array([-1., -1., -1., -1., -1., -1., -1.,  1., -1., -1., -1., -1., -1.,
         -1.,  1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1.,  1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1.,  1.,  1.,  1., -1., -1., -1., -1., -1.,  1., -1.,
         -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
          1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1.,  1.,  1., -1., -1., -1., -1., -1.,
         -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
         -1., -1., -1.,  1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
          1., -1., -1., -1., -1., -1.,  1.,  1., -1., -1., -1., -1., -1.,
         -1., -1.,  1., -1., -1., -1.,