In [75]:
%load_ext autoreload
%autoreload 2

from algebra import *
from cache import *
from costs import *
from features import *
from gradients import *
from helpers import *
from model import *
from splits import *

import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import csv
import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [76]:
SUB_SAMPLE = False
CACHE_DIR = "test/cache/" if SUB_SAMPLE else "cache/"
SUBMISSIONS_DIR = "test/submissions/" if SUB_SAMPLE else "submissions/"

In [77]:
y, x, ids = load_csv_data('data/train.csv', SUB_SAMPLE)

# 1 - Analytical Results

### Ridge Regression with Fixed Degree

##### Without Validation

In [57]:
def clean_standardize_expand(y, x, h):
        
    degree = int(h['degree'])

    x = remove_errors(x)
    x = remove_outliers(x)
    x = standardize_all(x)
    x = remove_nan_features(x)
    x = build_poly(x, degree)

    return y, x

def ridge_regression_analytical(y, x, h):

    lambda_ = float(h['lambda'])
    degree = int(h['degree'])

    w = ridge_regression(y, x, lambda_)
    
    return {
        'w': w,
        'mse': compute_mse(y, x, w)
    }

def ridge_mse(y, x, w, h):

    lambda_ = float(h['lambda'])
    
    mse = compute_mse(y, x, w)
    ridge_norm = np.linalg.norm(w, 2) * lambda_
        
    return {
        'mse': mse,
        'ridge_norm': ridge_norm,
        'total_loss': mse + ridge_norm,
        'n_err': compute_error_count(predict_values)(y, x, w)
    }
    

In [None]:
hs = { 
    'degree': [5, 6, 7], 
    'lambda': 1e-4,
}

_ = evaluate(
    clean = clean_standardize_expand, 
    fit   = ridge_regression_analytical, 
    x     = x, 
    y     = y, 
    hs    = hs, 
    cache = CACHE_DIR + 'clean_standardize_expand_ridge_regression_analytical'
)

##### Using Cross-Validation

Here, we implement the same model with cross-validation.

In [None]:
hs = { 
    'degree': np.arange(4, 16), 
    'lambda': np.logspace(-8, -4, 5),
    'k_fold': 4,
    'seed': 0
}

def mse(y, x, w):
    return { 'mse' : compute_mse(y, x, w) }



evaluate(
    clean = cross_validate(ridge_regression_analytical, mse), 
    fit   = fit_function, 
    x     = x,
    y     = y, 
    hs    = hs, 
    cache = CACHE_DIR + 'clean_standardize_expand_cross_validate_ridge_regression_analytical_mse'
)

In [None]:
# myModel.predict(best_h, x, y, SUBMISSIONS_DIR + 'RidgeRegression_MSE_FixedDegree_CrossValidation_Model')

## Gradient Descents

#### Least Square

#### Ridge Regression

#### Lasso

# Logistic Regression

In [18]:
def map_logistic(clean):
    
    def inner_function(y, x, h):
        y, x = clean(y, x, h)
        y = np.where(y == 1, 1, 0)
        return y, x
    
    return inner_function

def logistic_gradient(y, x, w, h):
    
    return compute_logistic_gradient(y, x, w)
            
def logistic_error(y, x, w, h):
    
    return { 
        'logistic_err': compute_logistic_error(y, x, w),
        'n_err': compute_error_count(predict_logistic)(y, x, w)
    }

def logistic_gradient_ridge(y, x, w, h):
    
    lambda_ = h['lambda']
    
    return compute_logistic_gradient(y, x, w) + lambda_ * w

def logistic_error_and_ridge(y, x, w, h):
    
    lambda_ = h['lambda']
    
    ridge_norm = np.linalg.norm(w, 2) * lambda_
    logistic_err = compute_logistic_error(y, x, w)
    n_err = compute_error_count(predict_logistic)(y, x, w)
    
    return {
        'logistic_err': logistic_err,
        'ridge_norm': ridge_norm,
        'total_loss': logistic_err + ridge_norm,
        'n_err': n_err
    }

def logistic_gradient_lasso(y, x, w, h):
    
    lambda_ = h['lambda']
    
    return compute_logistic_gradient(y, x, w) + lambda_ * np.sign(w)

def logistic_error_and_lasso(y, x, w, h):
    
    lambda_ = h['lambda']
    
    lasso_norm = np.linalg.norm(w, 1) * lambda_
    logistic_err = compute_logistic_error(y, x, w)
    n_err = compute_error_count(predict_logistic)(y, x, w)
    
    return {
        'logistic_err': logistic_err,
        'lasso_norm': lasso_norm,
        'total_loss': logistic_err + lasso_norm,
        'n_err': n_err
    }

### Without Validation

##### Stochastic Gradient Descent

In [40]:
hs = {
    'batch_size': 2500,
    'degree': np.concatenate([[-2], np.arange(1, 7)]),
    'gamma': [1e-2, 1e-3], 
    'k_fold': 4,
    'lambda': 0,
    'max_iters': 3000,
    'num_batches': 1,
    'seed': 1,
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_descent')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = descent_with_loss(stochastic_gradient_descent_e(logistic_gradient), logistic_error),  
        round_size = 100,
        cache      = cache,
        multiple   = False,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

iteration 100 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 1, 'logistic_err': 0.5157348347950221, 'n_err': 0.2486}
iteration 200 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 101, 'logistic_err': 0.48062207362374104, 'n_err': 0.2256}
iteration 300 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 201, 'logistic_err': 0.4631917074576885, 'n_err': 0.213}
iteration 400 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 301, 'logistic_err': 0.44928213278017104, 'n_err': 0.202}
iteration 500 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0, 'max_iters': 100, 'num_batches': 1, 'seed': 401, 'logistic_err': 0.43956587335652947, 'n_err': 0.199}
iteration 600 - {'batch_size': 2

KeyboardInterrupt: 

###### Stochastic Gradient Descent With Ridge Regression

In [None]:
hs = {
    'batch_size': 2500,
    'degree': [-2] + np.arange(3, 4),
    'gamma': [1e-2, 1e-3], 
    'lambda': [1e-2, 1e-3],
    'k_fold': 4,
    'max_iters': 1000,
    'num_batches': 1,
    'seed': 1,
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_ridge_descent')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = stochastic_gradient_descent_e(logistic_gradient_ridge), 
        loss       = logistic_error_and_ridge, 
        round_size = 100,
        cache      = cache,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

##### Stochastic Gradient Descent With Lasso

In [None]:
hs = {
    'batch_size': 2500,
    'degree': [-2, 1, 2, 3, 4, 5, 6],
    'gamma': [1e-1, 1e-2, 1e-3], 
    'lambda': [1e-1, 1e-2, 1e-3],
    'k_fold': 4,
    'max_iters': 2000,
    'num_batches': 1,
    'seed': 0,
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_lasso_descent')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = stochastic_gradient_descent_e(logistic_gradient_lasso), 
        loss       = logistic_error_and_lasso, 
        round_size = 100,
        cache      = cache,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

### With Cross-Validation

##### Stochastic Gradient Descent

In [None]:
hs = {
    'batch_size': 2500,
    'degree': np.concatenate([[-2], np.arange(1, 7)]),
    'gamma': [1e-2, 1e-3], 
    'k_fold': 4,
    'lambda': 0,
    'max_iters': 3000,
    'num_batches': 1,
    'seed': 0,
    'seed_cv': 0
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_descent_cross_validate')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = cross_validate_descent(
            stochastic_gradient_descent_e(logistic_gradient), 
            logistic_error
        ),
        round_size = 100,
        cache      = cache,
        multiple   = True,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

##### Descent with Ridge

In [None]:
hs = {
    'batch_size': 2500,
    'degree': [-2] + np.arange(3, 4),
    'gamma': [1e-2, 1e-3], 
    'lambda': [1e-2, 1e-3],
    'k_fold': 4,
    'max_iters': 1000,
    'num_batches': 1,
    'seed': 1,
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_ridge_descent_cross_validate')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = cross_validate_descent(
            stochastic_gradient_descent_e(logistic_gradient_ridge), 
            logistic_error_and_ridge,
        ),
        round_size = 100,
        cache      = cache,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

##### Descent with Lasso

In [None]:
hs = {
    'batch_size': 2500,
    'degree': [-2] + np.arange(3, 4),
    'gamma': [1e-2, 1e-3], 
    'lambda': [1e-2, 1e-3],
    'k_fold': 4,
    'max_iters': 1000,
    'num_batches': 1,
    'seed': 1,
}

cache = Cache(CACHE_DIR + 'clean_standardize_expand_stochastic_logistic_lasso_descent_cross_validate')

_ = evaluate(
    clean = map_logistic(clean_standardize_expand), 
    fit   = descent_with_cache(
        descent    = cross_validate_descent(
            stochastic_gradient_descent_e(logistic_gradient_lasso), 
            logistic_error_and_lasso,
        ),
        round_size = 100,
        cache      = cache,
        log        = True
    ), 
    y     = y,
    x     = x,
    hs    = hs
)

# Split Dataset

### Least Square Ridge

In [14]:
def split_data(y, x, ids):
    
    def categorize(x):
        if x[0] == -999.0:
            return 0
        else:
            return x[22] + 1
    
    categories = np.apply_along_axis(categorize, 1, x)

    xs = [x[categories == i] for i in np.arange(5)]
    ys = [y[categories == i] for i in np.arange(5)]
    ids = [ids[categories == i] for i in np.arange(5)]
    
    return ys, xs, ids

In [78]:
y_split, x_split = split_data(y, x)

In [80]:
hs = { 
    'degree': np.concatenate([np.array([-2]), np.arange(4, 16)]), 
    'lambda': np.logspace(-8, -4, 5),
    'k_fold': 4,
    'seed': 0
}

for i in range(5):
    
    cache = Cache(CACHE_DIR + f'clean_standardize_expand_cross_validate_ridge_regression_analytical_mse_split{i}')

    result = evaluate(
        clean = clean_standardize_expand, 
        fit   = fit_with_cache(cross_validate(ridge_regression_analytical, ridge_mse), cache), 
        x     = x_split[i],
        y     = y_split[i], 
        hs    = hs
    )

In [81]:
hs = { 
    'degree': np.concatenate([np.array([-2]), np.arange(4, 16)]), 
    'lambda': np.logspace(-8, -4, 5),
    'k_fold': 4,
    'seed': 0
}


cache = Cache(CACHE_DIR + 'clean_standardize_expand_cross_validate_ridge_regression_analytical_mse_no_split')

result1 = evaluate(
    clean = clean_standardize_expand, 
    fit   = fit_with_cache(cross_validate(ridge_regression_analytical, ridge_mse), cache), 
    x     = x,
    y     = y, 
    hs    = hs
)

###  Logistic Ridge

In [None]:
hs = {
    'batch_size': 2500,
    'degree': np.concatenate([np.array([-2]), np.arange(1, 4)]), 
    'gamma': [1e-2, 1e-3], 
    'lambda': [1e-2, 1e-3],
    'k_fold': 4,
    'max_iters': 1000,
    'num_batches': 1,
    'seed': 0,
    'seed_cv': 0
}

for i in range(5):
    
    cache = Cache(CACHE_DIR + f'clean_standardize_expand_logistic_ridge_regression_cross_validate_split{i}')

    _ = evaluate(
        clean = map_logistic(clean_standardize_expand), 
        fit   = descent_with_cache(
            descent    = cross_validate_descent(
                            stochastic_gradient_descent_e(logistic_gradient_ridge), 
                            logistic_error_and_ridge,
                        ),
            round_size = 100,
            cache      = cache,
            log        = True
        ), 
        y     = y_split[i],
        x     = x_split[i],
        hs    = hs
    )

iteration 100 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 0, 'seed_cv': 0, 'avg_logistic_err_tr': 0.2802956357278603, 'avg_ridge_norm_tr': 0.005513907527966065, 'avg_total_loss_tr': 0.28580954325582636, 'avg_n_err_tr': 0.06683809123985446, 'avg_logistic_err_te': 0.2806963559780926, 'avg_ridge_norm_te': 0.005513907527966065, 'avg_total_loss_te': 0.28621026350605866, 'avg_n_err_te': 0.06664567590260286}
iteration 200 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 100, 'seed_cv': 0, 'avg_logistic_err_tr': 0.2390144211466928, 'avg_ridge_norm_tr': 0.007271311608074616, 'avg_total_loss_tr': 0.24628573275476745, 'avg_n_err_tr': 0.06561363000279877, 'avg_logistic_err_te': 0.24036869631921826, 'avg_ridge_norm_te': 0.007271311608074616, 'avg_total_loss_te': 0.24764000792729288, 'avg_n_err_te': 0.06591099916036944}
iteration 300 - {'batch_s

iteration 800 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 700, 'seed_cv': 0, 'avg_logistic_err_tr': 0.20184105337439734, 'avg_ridge_norm_tr': 0.0010843835438105157, 'avg_total_loss_tr': 0.20292543691820786, 'avg_n_err_tr': 0.06418800727679821, 'avg_logistic_err_te': 0.20466341143721586, 'avg_ridge_norm_te': 0.0010843835438105157, 'avg_total_loss_te': 0.2057477949810264, 'avg_n_err_te': 0.06441540722082284}
iteration 900 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 800, 'seed_cv': 0, 'avg_logistic_err_tr': 0.20099121197841052, 'avg_ridge_norm_tr': 0.0011130215897378797, 'avg_total_loss_tr': 0.20210423356814844, 'avg_n_err_tr': 0.06480023789532605, 'avg_logistic_err_te': 0.2041965234856376, 'avg_ridge_norm_te': 0.0011130215897378797, 'avg_total_loss_te': 0.2053095450753755, 'avg_n_err_te': 0.06507136859781695}
iteration 1000 - 

iteration 500 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 400, 'seed_cv': 0, 'avg_logistic_err_tr': 0.3376890843630257, 'avg_ridge_norm_tr': 0.0003779955406275863, 'avg_total_loss_tr': 0.3380670799036532, 'avg_n_err_tr': 0.06885845228099637, 'avg_logistic_err_te': 0.33829578869501037, 'avg_ridge_norm_te': 0.0003779955406275863, 'avg_total_loss_te': 0.338673784235638, 'avg_n_err_te': 0.06874475230898405}
iteration 600 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 500, 'seed_cv': 0, 'avg_logistic_err_tr': 0.3191790124277407, 'avg_ridge_norm_tr': 0.0004192475515141929, 'avg_total_loss_tr': 0.3195982599792549, 'avg_n_err_tr': 0.06851735236495941, 'avg_logistic_err_te': 0.3198090562366863, 'avg_ridge_norm_te': 0.0004192475515141929, 'avg_total_loss_te': 0.3202283037882005, 'avg_n_err_te': 0.06842989084802686}
iteration 700 - {'ba

iteration 200 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 100, 'seed_cv': 0, 'avg_logistic_err_tr': 0.2460011425728144, 'avg_ridge_norm_tr': 0.0007507446760006462, 'avg_total_loss_tr': 0.24675188724881503, 'avg_n_err_tr': 0.07071263643996642, 'avg_logistic_err_te': 0.24630073622936408, 'avg_ridge_norm_te': 0.0007507446760006462, 'avg_total_loss_te': 0.24705148090536472, 'avg_n_err_te': 0.07066015952980689}
iteration 300 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 200, 'seed_cv': 0, 'avg_logistic_err_tr': 0.23096633027354962, 'avg_ridge_norm_tr': 0.0008582221869459955, 'avg_total_loss_tr': 0.23182455246049563, 'avg_n_err_tr': 0.0698817520291072, 'avg_logistic_err_te': 0.231158512610165, 'avg_ridge_norm_te': 0.0008582221869459955, 'avg_total_loss_te': 0.232016734797111, 'avg_n_err_te': 0.06995172124265323}
iteration 400 - {'batc

iteration 900 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 800, 'seed_cv': 0, 'avg_logistic_err_tr': 0.29667656497797606, 'avg_ridge_norm_tr': 0.005229583741895787, 'avg_total_loss_tr': 0.30190614871987187, 'avg_n_err_tr': 0.07169220542961097, 'avg_logistic_err_te': 0.2970375638728564, 'avg_ridge_norm_te': 0.005229583741895787, 'avg_total_loss_te': 0.30226714761475215, 'avg_n_err_te': 0.0716309823677582}
iteration 1000 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 900, 'seed_cv': 0, 'avg_logistic_err_tr': 0.2874056947791859, 'avg_ridge_norm_tr': 0.005519204611156941, 'avg_total_loss_tr': 0.29292489939034283, 'avg_n_err_tr': 0.07116743632801567, 'avg_logistic_err_te': 0.2878087430615758, 'avg_ridge_norm_te': 0.005519204611156941, 'avg_total_loss_te': 0.29332794767273274, 'avg_n_err_te': 0.07128988245172124}
iteration 100 - {'batch

iteration 600 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 500, 'seed_cv': 0, 'avg_logistic_err_tr': 0.21713403580808613, 'avg_ridge_norm_tr': 0.009631393812593942, 'avg_total_loss_tr': 0.22676542962068008, 'avg_n_err_tr': 0.07005667506297229, 'avg_logistic_err_te': 0.21879723215484903, 'avg_ridge_norm_te': 0.009631393812593942, 'avg_total_loss_te': 0.22842862596744296, 'avg_n_err_te': 0.07010915197313183}
iteration 700 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 600, 'seed_cv': 0, 'avg_logistic_err_tr': 0.217192889482131, 'avg_ridge_norm_tr': 0.009972356067687775, 'avg_total_loss_tr': 0.22716524554981882, 'avg_n_err_tr': 0.06957563671984326, 'avg_logistic_err_te': 0.21843001048224425, 'avg_ridge_norm_te': 0.009972356067687775, 'avg_total_loss_te': 0.22840236654993204, 'avg_n_err_te': 0.06992548278757346}
iteration 800 - {'batch_

iteration 300 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 200, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4015985262050578, 'avg_ridge_norm_tr': 0.0027325139070319424, 'avg_total_loss_tr': 0.40433104011208976, 'avg_n_err_tr': 0.06982052896725441, 'avg_logistic_err_te': 0.40203826320098873, 'avg_ridge_norm_te': 0.0027325139070319424, 'avg_total_loss_te': 0.4047707771080206, 'avg_n_err_te': 0.0698992443324937}
iteration 400 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 300, 'seed_cv': 0, 'avg_logistic_err_tr': 0.36497648934861326, 'avg_ridge_norm_tr': 0.003331604358658217, 'avg_total_loss_tr': 0.36830809370727147, 'avg_n_err_tr': 0.07026658270361041, 'avg_logistic_err_te': 0.36541063890108577, 'avg_ridge_norm_te': 0.003331604358658217, 'avg_total_loss_te': 0.368742243259744, 'avg_n_err_te': 0.070424013434089}
iteration 500 - {'batch_s

iteration 1000 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 900, 'seed_cv': 0, 'avg_logistic_err_tr': 0.2752158255801573, 'avg_ridge_norm_tr': 0.0005546354486213115, 'avg_total_loss_tr': 0.2757704610287786, 'avg_n_err_tr': 0.07029282115869018, 'avg_logistic_err_te': 0.2764784885299838, 'avg_ridge_norm_te': 0.0005546354486213115, 'avg_total_loss_te': 0.27703312397860513, 'avg_n_err_te': 0.07018786733837112}
iteration 100 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 0, 'seed_cv': 0, 'avg_logistic_err_tr': 0.49982568546495576, 'avg_ridge_norm_tr': 0.003896455504381534, 'avg_total_loss_tr': 0.5037221409693373, 'avg_n_err_tr': 0.2117598164109792, 'avg_logistic_err_te': 0.4999627397352905, 'avg_ridge_norm_te': 0.003896455504381534, 'avg_total_loss_te': 0.503859195239672, 'avg_n_err_te': 0.2129343524692362}
iteration 200 - {'batch_siz

iteration 700 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 600, 'seed_cv': 0, 'avg_logistic_err_tr': 0.44045468623938955, 'avg_ridge_norm_tr': 0.0008753473168474694, 'avg_total_loss_tr': 0.441330033556237, 'avg_n_err_tr': 0.19829330875842502, 'avg_logistic_err_te': 0.44133379048917987, 'avg_ridge_norm_te': 0.0008753473168474694, 'avg_total_loss_te': 0.44220913780602733, 'avg_n_err_te': 0.19860953000487883}
iteration 800 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 700, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4390866341924806, 'avg_ridge_norm_tr': 0.0009157113616222203, 'avg_total_loss_tr': 0.4400023455541028, 'avg_n_err_tr': 0.19813519813519814, 'avg_logistic_err_te': 0.4411337745519255, 'avg_ridge_norm_te': 0.0009157113616222203, 'avg_total_loss_te': 0.4420494859135477, 'avg_n_err_te': 0.1988263674310186}
iteration 900 - {'bat

iteration 400 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 300, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5483542041582139, 'avg_ridge_norm_tr': 0.00021817056391417315, 'avg_total_loss_tr': 0.548572374722128, 'avg_n_err_tr': 0.265363835131277, 'avg_logistic_err_te': 0.5484450562579817, 'avg_ridge_norm_te': 0.00021817056391417315, 'avg_total_loss_te': 0.5486632268218957, 'avg_n_err_te': 0.26585623678646936}
iteration 500 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 400, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5369854009367527, 'avg_ridge_norm_tr': 0.00024880006316096954, 'avg_total_loss_tr': 0.5372342009999137, 'avg_n_err_tr': 0.2472127355848286, 'avg_logistic_err_te': 0.5371908949066602, 'avg_ridge_norm_te': 0.00024880006316096954, 'avg_total_loss_te': 0.5374396949698211, 'avg_n_err_te': 0.24699138071231097}
iteration 600 - {'ba

iteration 100 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 0, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5166976768196165, 'avg_ridge_norm_tr': 0.00037653713221146707, 'avg_total_loss_tr': 0.5170742139518281, 'avg_n_err_tr': 0.23208380766520303, 'avg_logistic_err_te': 0.5167270876190346, 'avg_ridge_norm_te': 0.00037653713221146707, 'avg_total_loss_te': 0.5171036247512462, 'avg_n_err_te': 0.23147395240418497}
iteration 200 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 100, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4860216228082623, 'avg_ridge_norm_tr': 0.0005405158452920089, 'avg_total_loss_tr': 0.4865621386535543, 'avg_n_err_tr': 0.21639923384109433, 'avg_logistic_err_te': 0.48595627034056743, 'avg_ridge_norm_te': 0.0005405158452920089, 'avg_total_loss_te': 0.48649678618585945, 'avg_n_err_te': 0.21598362877432647}
iteration 300 - {'batc

iteration 800 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 700, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5301895671442113, 'avg_ridge_norm_tr': 0.003187982866092577, 'avg_total_loss_tr': 0.5333775500103037, 'avg_n_err_tr': 0.24672485137601416, 'avg_logistic_err_te': 0.5302859193075433, 'avg_ridge_norm_te': 0.003187982866092577, 'avg_total_loss_te': 0.5334739021736359, 'avg_n_err_te': 0.246530601181764}
iteration 900 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 800, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5237784628110755, 'avg_ridge_norm_tr': 0.0034362663829595515, 'avg_total_loss_tr': 0.5272147291940351, 'avg_n_err_tr': 0.2398989898989899, 'avg_logistic_err_te': 0.5238882282567584, 'avg_ridge_norm_te': 0.0034362663829595515, 'avg_total_loss_te': 0.527324494639718, 'avg_n_err_te': 0.23983574564969914}
iteration 1000 - {'batch_size'

iteration 500 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 400, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4641311305870716, 'avg_ridge_norm_tr': 0.007064360813961809, 'avg_total_loss_tr': 0.47119549140103345, 'avg_n_err_tr': 0.20387235503514572, 'avg_logistic_err_te': 0.46566982781071137, 'avg_ridge_norm_te': 0.007064360813961809, 'avg_total_loss_te': 0.47273418862467315, 'avg_n_err_te': 0.20370520951916302}
iteration 600 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 500, 'seed_cv': 0, 'avg_logistic_err_tr': 0.45967060354308487, 'avg_ridge_norm_tr': 0.007622830156098176, 'avg_total_loss_tr': 0.467293433699183, 'avg_n_err_tr': 0.20371876185829674, 'avg_logistic_err_te': 0.4606742675859047, 'avg_ridge_norm_te': 0.007622830156098176, 'avg_total_loss_te': 0.46829709774200284, 'avg_n_err_te': 0.2034612674147558}
iteration 700 - {'batch_siz

iteration 200 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 100, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5811726665327275, 'avg_ridge_norm_tr': 0.0014302255168646394, 'avg_total_loss_tr': 0.5826028920495923, 'avg_n_err_tr': 0.24066695578323485, 'avg_logistic_err_te': 0.5806274749842533, 'avg_ridge_norm_te': 0.0014302255168646394, 'avg_total_loss_te': 0.5820577005011179, 'avg_n_err_te': 0.2403642868759148}
iteration 300 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 200, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5584724622737427, 'avg_ridge_norm_tr': 0.0018622449481695035, 'avg_total_loss_tr': 0.5603347072219123, 'avg_n_err_tr': 0.23441480999620534, 'avg_logistic_err_te': 0.5580204887541647, 'avg_ridge_norm_te': 0.0018622449481695035, 'avg_total_loss_te': 0.5598827337023342, 'avg_n_err_te': 0.2340488968395945}
iteration 400 - {'batch_si

iteration 900 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 800, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5126731313287469, 'avg_ridge_norm_tr': 0.0003397926201056944, 'avg_total_loss_tr': 0.5130129239488526, 'avg_n_err_tr': 0.22215897797293144, 'avg_logistic_err_te': 0.5128936144715188, 'avg_ridge_norm_te': 0.0003397926201056944, 'avg_total_loss_te': 0.5132334070916245, 'avg_n_err_te': 0.2219873150105708}
iteration 1000 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 900, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5099118880259963, 'avg_ridge_norm_tr': 0.00035229703133225884, 'avg_total_loss_tr': 0.5102641850573285, 'avg_n_err_tr': 0.2231211940514266, 'avg_logistic_err_te': 0.5103025987447334, 'avg_ridge_norm_te': 0.00035229703133225884, 'avg_total_loss_te': 0.5106548957760655, 'avg_n_err_te': 0.2241421369328346}
iteration 100 - {'batc

iteration 600 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 500, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4893789405550825, 'avg_ridge_norm_tr': 0.0008469389537562391, 'avg_total_loss_tr': 0.4902258795088388, 'avg_n_err_tr': 0.22093931599504618, 'avg_logistic_err_te': 0.4908823686734152, 'avg_ridge_norm_te': 0.0008469389537562391, 'avg_total_loss_te': 0.4917293076271715, 'avg_n_err_te': 0.22307802229208346}
iteration 700 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 600, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4837317266710782, 'avg_ridge_norm_tr': 0.0009064168786495389, 'avg_total_loss_tr': 0.48463814354972773, 'avg_n_err_tr': 0.21854815661617605, 'avg_logistic_err_te': 0.48600143141646424, 'avg_ridge_norm_te': 0.0009064168786495389, 'avg_total_loss_te': 0.48690784829511374, 'avg_n_err_te': 0.21927693626750502}
iteration 800 - {'b

iteration 300 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 200, 'seed_cv': 0, 'avg_logistic_err_tr': 0.6032918763817133, 'avg_ridge_norm_tr': 0.00015148620038794676, 'avg_total_loss_tr': 0.6034433625821013, 'avg_n_err_tr': 0.35003810612555963, 'avg_logistic_err_te': 0.603684604580117, 'avg_ridge_norm_te': 0.00015148620038794676, 'avg_total_loss_te': 0.6038360907805049, 'avg_n_err_te': 0.34969991426121744}
iteration 400 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 300, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5928439019046364, 'avg_ridge_norm_tr': 0.0001814821116821988, 'avg_total_loss_tr': 0.5930253840163187, 'avg_n_err_tr': 0.3394446032199676, 'avg_logistic_err_te': 0.5932075183911136, 'avg_ridge_norm_te': 0.0001814821116821988, 'avg_total_loss_te': 0.5933890005027957, 'avg_n_err_te': 0.3401686196056016}
iteration 500 - {'bat

iteration 200 - {'batch_size': 2500, 'degree': 1, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 100, 'seed_cv': 0, 'avg_logistic_err_tr': 0.6766475182530035, 'avg_ridge_norm_tr': 5.789965187952384e-05, 'avg_total_loss_tr': 0.6767054179048829, 'avg_n_err_tr': 0.3256978184243117, 'avg_logistic_err_te': 0.6766566440909527, 'avg_ridge_norm_te': 5.789965187952384e-05, 'avg_total_loss_te': 0.6767145437428321, 'avg_n_err_te': 0.3257216347527865}
iteration 300 - {'batch_size': 2500, 'degree': 1, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 200, 'seed_cv': 0, 'avg_logistic_err_tr': 0.6701692266039162, 'avg_ridge_norm_tr': 8.296495465867697e-05, 'avg_total_loss_tr': 0.670252191558575, 'avg_n_err_tr': 0.3244117366866724, 'avg_logistic_err_te': 0.6702064059357027, 'avg_ridge_norm_te': 8.296495465867697e-05, 'avg_total_loss_te': 0.6702893708903613, 'avg_n_err_te': 0.32446413260931695}
iteration 400 - {'batch_si

iteration 900 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 800, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4940306826091957, 'avg_ridge_norm_tr': 0.009544986485251223, 'avg_total_loss_tr': 0.5035756690944468, 'avg_n_err_tr': 0.22671715728303327, 'avg_logistic_err_te': 0.4954586693473948, 'avg_ridge_norm_te': 0.009544986485251223, 'avg_total_loss_te': 0.505003655832646, 'avg_n_err_te': 0.22667905115747358}
iteration 1000 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 900, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4918545852628742, 'avg_ridge_norm_tr': 0.009879826077266209, 'avg_total_loss_tr': 0.5017344113401404, 'avg_n_err_tr': 0.22494522244450793, 'avg_logistic_err_te': 0.49333590515807624, 'avg_ridge_norm_te': 0.009879826077266209, 'avg_total_loss_te': 0.5032157312353424, 'avg_n_err_te': 0.22525007144898546}
iteration 100 - {'batch_size'

iteration 600 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 500, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5926085429007577, 'avg_ridge_norm_tr': 0.0022203933416733573, 'avg_total_loss_tr': 0.594828936242431, 'avg_n_err_tr': 0.332161569972373, 'avg_logistic_err_te': 0.5928102589621921, 'avg_ridge_norm_te': 0.0022203933416733573, 'avg_total_loss_te': 0.5950306523038654, 'avg_n_err_te': 0.33263789654186915}
iteration 700 - {'batch_size': 2500, 'degree': 2, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 600, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5866180331924548, 'avg_ridge_norm_tr': 0.002448115098676684, 'avg_total_loss_tr': 0.5890661482911315, 'avg_n_err_tr': 0.3247832714108793, 'avg_logistic_err_te': 0.5868952312394109, 'avg_ridge_norm_te': 0.002448115098676684, 'avg_total_loss_te': 0.5893433463380875, 'avg_n_err_te': 0.3244212632180623}
iteration 800 - {'batch_size': 

iteration 300 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 200, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5311285064448088, 'avg_ridge_norm_tr': 0.005700871133760883, 'avg_total_loss_tr': 0.5368293775785697, 'avg_n_err_tr': 0.23952557873678196, 'avg_logistic_err_te': 0.5343838754428233, 'avg_ridge_norm_te': 0.005700871133760883, 'avg_total_loss_te': 0.5400847465765841, 'avg_n_err_te': 0.24015432980851673}
iteration 400 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 300, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5204168435026291, 'avg_ridge_norm_tr': 0.006622566211262514, 'avg_total_loss_tr': 0.5270394097138915, 'avg_n_err_tr': 0.2370963132323521, 'avg_logistic_err_te': 0.5213843277000202, 'avg_ridge_norm_te': 0.006622566211262514, 'avg_total_loss_te': 0.5280068939112827, 'avg_n_err_te': 0.2373963989711346}
iteration 500 - {'batch_size': 2

iteration 1000 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 900, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4837548874987473, 'avg_ridge_norm_tr': 0.0010613961626639952, 'avg_total_loss_tr': 0.4848162836614113, 'avg_n_err_tr': 0.2229017814613699, 'avg_logistic_err_te': 0.48551334690974135, 'avg_ridge_norm_te': 0.0010613961626639952, 'avg_total_loss_te': 0.48657474307240534, 'avg_n_err_te': 0.22394969991426122}
iteration 100 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.01, 'max_iters': 100, 'num_batches': 1, 'seed': 0, 'seed_cv': 0, 'avg_logistic_err_tr': 0.6403569193599665, 'avg_ridge_norm_tr': 0.0006621331979724392, 'avg_total_loss_tr': 0.641019052557939, 'avg_n_err_tr': 0.34637039154044014, 'avg_logistic_err_te': 0.64095109289312, 'avg_ridge_norm_te': 0.0006621331979724392, 'avg_total_loss_te': 0.6416132260910925, 'avg_n_err_te': 0.3467562160617319}
iteration 200 - {'batch_size

iteration 700 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 600, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5793040293275751, 'avg_ridge_norm_tr': 0.0002410530146716084, 'avg_total_loss_tr': 0.5795450823422467, 'avg_n_err_tr': 0.2880823092312089, 'avg_logistic_err_te': 0.5802113183563751, 'avg_ridge_norm_te': 0.0002410530146716084, 'avg_total_loss_te': 0.5804523713710467, 'avg_n_err_te': 0.2887825092883681}
iteration 800 - {'batch_size': 2500, 'degree': 3, 'gamma': 0.001, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 700, 'seed_cv': 0, 'avg_logistic_err_tr': 0.5772493913677494, 'avg_ridge_norm_tr': 0.00026056599775348274, 'avg_total_loss_tr': 0.5775099573655029, 'avg_n_err_tr': 0.2803801086024579, 'avg_logistic_err_te': 0.5776901873571235, 'avg_ridge_norm_te': 0.00026056599775348274, 'avg_total_loss_te': 0.5779507533548769, 'avg_n_err_te': 0.2813661046013146}
iteration 900 - {'batch_

iteration 400 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 300, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4594655637029497, 'avg_ridge_norm_tr': 0.0007548192688151838, 'avg_total_loss_tr': 0.46022038297176493, 'avg_n_err_tr': 0.19709429824561403, 'avg_logistic_err_te': 0.4632049226425771, 'avg_ridge_norm_te': 0.0007548192688151838, 'avg_total_loss_te': 0.46395974191139233, 'avg_n_err_te': 0.19846491228070176}
iteration 500 - {'batch_size': 2500, 'degree': -2, 'gamma': 0.01, 'k_fold': 4, 'lambda': 0.001, 'max_iters': 100, 'num_batches': 1, 'seed': 400, 'seed_cv': 0, 'avg_logistic_err_tr': 0.4512803024062271, 'avg_ridge_norm_tr': 0.0008304120547770689, 'avg_total_loss_tr': 0.45211071446100415, 'avg_n_err_tr': 0.19367830634278002, 'avg_logistic_err_te': 0.4558849618926861, 'avg_ridge_norm_te': 0.0008304120547770689, 'avg_total_loss_te': 0.4567153739474632, 'avg_n_err_te': 0.19627192982456138}
iteration 600 - {'b

In [None]:
hs = { 
    'degree': np.concatenate([np.array([-2]), np.arange(4, 16)]), 
    'lambda': np.logspace(-8, -4, 5),
    'k_fold': 4,
    'seed': 0
}

for i in range(5):
    
    cache = Cache(CACHE_DIR + f'clean_standardize_expand_cross_validate_ridge_regression_analytical_mse_split{i}')

    result = evaluate(
        clean = clean_standardize_expand, 
        fit   = fit_with_cache(cross_validate(ridge_regression_analytical, ridge_mse), cache), 
        x     = x_split[i],
        y     = y_split[i], 
        hs    = hs
    )

### Logistic Lasso

In [None]:
hs = {
    'batch_size': 2500,
    'degree': np.concatenate([np.array([-2]), np.arange(1, 4)]), 
    'gamma': [1e-2, 1e-3], 
    'lambda': [1e-2, 1e-3],
    'k_fold': 4,
    'max_iters': 1000,
    'num_batches': 1,
    'seed': 0,
    'seed_cv': 0
}

for i in range(5):
    
    cache = Cache(CACHE_DIR + f'clean_standardize_expand_logistic_lasso_regression_cross_validate_split{i}')

    _ = evaluate(
        clean = map_logistic(clean_standardize_expand), 
        fit   = descent_with_cache(
            descent    = cross_validate_descent(
                            stochastic_gradient_descent_e(logistic_gradient_lasso), 
                            logistic_error_and_lasso,
                        ),
            round_size = 100,
            cache      = cache,
            log        = True
        ), 
        y     = y_split[i],
        x     = x_split[i],
        hs    = hs
    )