In [59]:
# Useful starting lines
%matplotlib inline
import numpy as np
'''ONLY FOR VISUALIZATION'''
import pandas as pd 
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the training data into feature matrix, class labels, and event ids:

In [85]:
from proj1_helpers import *
#Modify DATA_PATH if needed
DATA_TRAIN_PATH = '../../data_project1/train.csv'
y, tX_old, ids = load_csv_data(DATA_TRAIN_PATH)

In [86]:
import csv

def load_csv_data(data_path, sub_sample=False):
    """Loads data and returns y (class labels), tX (features) and ids (event ids)"""
    y = np.genfromtxt(data_path, delimiter=",", skip_header=1, dtype=str, usecols=1)
    x = np.genfromtxt(data_path, delimiter=",", skip_header=1)
    ids = x[:, 0].astype(np.int)
    input_data = x[:, 2:]

    # convert class labels from strings to binary (-1,1)
    yb = np.ones(len(y))
    yb[np.where(y=='b')] = -1
    
    # sub-sample
    if sub_sample:
        yb = yb[::50]
        input_data = input_data[::50]
        ids = ids[::50]

    return yb, input_data, ids


def predict_labels(weights, data):
    """Generates class predictions given weights, and a test data matrix"""
    y_pred = np.dot(data, weights)
    y_pred[np.where(y_pred <= 0)] = -1
    y_pred[np.where(y_pred > 0)] = 1
    
    return y_pred


def create_csv_submission(ids, y_pred, name):
    """
    Creates an output file in csv format for submission to kaggle
    Arguments: ids (event ids associated with each prediction)
               y_pred (predicted class labels)
               name (string name of .csv output file to be created)
    """
    with open(name, 'w') as csvfile:
        fieldnames = ['Id', 'Prediction']
        writer = csv.DictWriter(csvfile, delimiter=",", fieldnames=fieldnames)
        writer.writeheader()
        for r1, r2 in zip(ids, y_pred):
            writer.writerow({'Id':int(r1),'Prediction':int(r2)})
            
def batch_iter(y, tx, batch_size, num_batches=1, shuffle=True):
    """
    Generate a minibatch iterator for a dataset.
    Takes as input two iterables (here the output desired values 'y' and the input data 'tx')
    Outputs an iterator which gives mini-batches of `batch_size` matching elements from `y` and `tx`.
    Data can be randomly shuffled to avoid ordering in the original data messing with the randomness of the minibatches.
    Example of use :
    for minibatch_y, minibatch_tx in batch_iter(y, tx, 32):
        <DO-SOMETHING>
    """
    data_size = len(y)

    if shuffle:
        shuffle_indices = np.random.permutation(np.arange(data_size))
        shuffled_y = y[shuffle_indices]
        shuffled_tx = tx[shuffle_indices]
    else:
        shuffled_y = y
        shuffled_tx = tx
    for batch_num in range(num_batches):
        start_index = batch_num * batch_size
        end_index = min((batch_num + 1) * batch_size, data_size)
        if start_index != end_index:
            yield shuffled_y[start_index:end_index], shuffled_tx[start_index:end_index]

def standardize(x):
    """Standardize the original data set."""
    mean_x = np.mean(x[x > -999], axis=0)
    x = x - mean_x
    std_x = np.std(x[x > -999], axis=0)
    x = x / std_x
    return x, mean_x, std_x

def log_standardize(x):
    """Standardize the original data set."""
    x = np.log(x[x > -999])
    mean_x = np.mean(x[x > -999], axis=0)
    x = x - mean_x
    std_x = np.std(x[x > -999], axis=0)
    x = x / std_x
    return x, mean_x, std_x



In [87]:
'''DATASET INTRINSICS AND SHAPE (TARGETS AND IDS INCLUDED)'''
def DataSetInfo(tX_old, y, ids):
    print("Training examples: ", tX_old, " & shape: ")
    print("Targets: ", y)
    print("Ids: ",ids)
    print("Shapes of tX, y & Ids: ", tX_old.shape, y.shape, ids.shape)
DataSetInfo(tX_old, y, ids)

Training examples:  [[ 138.47    51.655   97.827 ...    1.24    -2.475  113.497]
 [ 160.937   68.768  103.235 ... -999.    -999.      46.226]
 [-999.     162.172  125.953 ... -999.    -999.      44.251]
 ...
 [ 105.457   60.526   75.839 ... -999.    -999.      41.992]
 [  94.951   19.362   68.812 ... -999.    -999.       0.   ]
 [-999.      72.756   70.831 ... -999.    -999.       0.   ]]  & shape: 
Targets:  [ 1. -1. -1. ...  1. -1. -1.]
Ids:  [100000 100001 100002 ... 349997 349998 349999]
Shapes of tX, y & Ids:  (250000, 30) (250000,) (250000,)


In [88]:
'''INITIALIZE WEIGHTS'''
def InitWeights():
    ww = np.random.rand(tX.shape[1])
    #init_w = np.zeros(tX.shape[1])
    init_w = np.array(ww, dtype=np.float64)
    return init_w

In [89]:
'''PREPROCESS: FEATURE EXTRACTION AND STANDARDIZE'''
def PreProcess(tX_old):
    #Feature extraction
    tX = np.hstack((tX_old[:,1:3], tX_old[:,7:9]))
    tX = np.hstack((tX, tX_old[:,10:12]))
    tX = np.hstack((tX, tX_old[:,13:15])) 
    tX = np.hstack((tX, tX_old[:,16:18]))
    tX = np.hstack((tX, tX_old[:,19:20]))
    tX = np.hstack((tX, tX_old[:,21:23]))
    return tX

In [90]:
'''FEATURE CORRELATION MAP: ONLY FOR VISUALIZATION'''
'''CORRELATED FEATURES: CORR > THRESHOLD : USE FOR SYNTHESIS'''
def CorrMap(tX):
    df = pd.DataFrame(tX)
    corr = df.corr()
    return corr.style.background_gradient(cmap='coolwarm')

In [91]:
#tX = PreProcess(tX_old)
tX_pp = PreProcess(tX_old)
CorrMap(tX_pp)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,1.0,0.190109,0.0432514,0.0177575,0.349504,-0.419757,-0.145464,-0.00210917,0.310648,-0.00677707,0.183716,-0.167811,-0.210537
1,0.190109,1.0,0.579712,-0.000702134,0.0974898,-0.0908459,0.290011,0.0021266,0.405482,0.00219566,-0.0873304,0.0533004,-0.02686
2,0.0432514,0.579712,1.0,-0.148081,0.0470462,-0.205441,-0.202035,0.00363223,-0.0699567,0.000698829,-0.402345,-0.407002,-0.347904
3,0.0177575,-0.000702134,-0.148081,1.0,0.0391929,0.178448,0.0957537,0.0035959,0.109617,0.00798659,0.269739,0.448925,0.360409
4,0.349504,0.0974898,0.0470462,0.0391929,1.0,-0.0388027,-0.474633,0.000610942,0.701142,0.00235701,0.0533856,0.0499036,0.056312
5,-0.419757,-0.0908459,-0.205441,0.178448,-0.0388027,1.0,0.140885,0.00595349,0.0540471,0.0124227,0.180756,0.423363,0.490057
6,-0.145464,0.290011,-0.202035,0.0957537,-0.474633,0.140885,1.0,-0.00266116,0.104553,5.34862e-05,0.176665,0.448763,0.205604
7,-0.00210917,0.0021266,0.00363223,0.0035959,0.000610942,0.00595349,-0.00266116,1.0,-0.00333015,0.557086,0.000575642,0.00284137,0.0074675
8,0.310648,0.405482,-0.0699567,0.109617,0.701142,0.0540471,0.104553,-0.00333015,1.0,0.00033679,0.170545,0.358488,0.195738
9,-0.00677707,0.00219566,0.000698829,0.00798659,0.00235701,0.0124227,5.34862e-05,0.557086,0.00033679,1.0,0.000852694,0.0102312,0.0160537


In [100]:
'''FEATURE ENGINEERING'''
def FeatureSynthesis(tX_pp, tX_old):
    '''CORRELATED FEATURES WILL BE USED FOR NEW FEATURE ADDITION'''    
    '''MIN PART'''
    #tX = np.hstack((tX_pp, np.minimum((tX_old[:,15:16] - tX_old[:,18:19]),np.minimum((tX_old[:,15:16] - tX_old[:,20:21]),(tX_old[:,18:19] - tX_old[:,20:21])))))
    tX = np.hstack((tX_pp, np.minimum((tX_old[:,15:16] - tX_old[:,20:21]),(tX_old[:,18:19] - tX_old[:,20:21]))))
    tX = np.hstack((tX, np.minimum((tX_old[:,15:16] - tX_old[:,18:19]),(tX_old[:,15:16] - tX_old[:,20:21]))))
    tX = np.hstack((tX, (tX_old[:,18:19] - tX_old[:,20:21])))
    '''LN PART'''
    #tX = np.hstack((tX, np.log(np.abs(min((tX_old[:,13:14]*tX_old[:,14:15])))+(tX_old[:,13:14]*tX_old[:,14:15]))))
    #tX = np.hstack((tX, np.log(np.abs(min((tX_old[:,16:17]*tX_old[:,17:18])))+(tX_old[:,16:17]*tX_old[:,16:17]))))
    tX = np.hstack((tX, np.log(np.abs(min((tX_old[:,23:24]*tX_old[:,24:25])))+(tX_old[:,23:24]*tX_old[:,24:25]))))
    
   
    return tX
tX = FeatureSynthesis(tX_pp, tX_old)
tX.shape

(250000, 17)

In [101]:
CorrMap(tX)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,1.0,0.190109,0.0432514,0.0177575,0.349504,-0.419757,-0.145464,-0.00210917,0.310648,-0.00677707,0.183716,-0.167811,-0.210537,0.0179209,-0.16072,0.0116236,0.220595
1,0.190109,1.0,0.579712,-0.000702134,0.0974898,-0.0908459,0.290011,0.0021266,0.405482,0.00219566,-0.0873304,0.0533004,-0.02686,-0.0450701,-0.0174414,-0.000397049,0.0140255
2,0.0432514,0.579712,1.0,-0.148081,0.0470462,-0.205441,-0.202035,0.00363223,-0.0699567,0.000698829,-0.402345,-0.407002,-0.347904,-0.132653,-0.0511354,0.000566199,0.305039
3,0.0177575,-0.000702134,-0.148081,1.0,0.0391929,0.178448,0.0957537,0.0035959,0.109617,0.00798659,0.269739,0.448925,0.360409,0.0219618,0.0286189,-0.00484093,-0.187224
4,0.349504,0.0974898,0.0470462,0.0391929,1.0,-0.0388027,-0.474633,0.000610942,0.701142,0.00235701,0.0533856,0.0499036,0.056312,0.00132068,-0.0304761,-0.00277192,-0.0432551
5,-0.419757,-0.0908459,-0.205441,0.178448,-0.0388027,1.0,0.140885,0.00595349,0.0540471,0.0124227,0.180756,0.423363,0.490057,0.0131745,0.115743,-0.00687207,-0.534526
6,-0.145464,0.290011,-0.202035,0.0957537,-0.474633,0.140885,1.0,-0.00266116,0.104553,5.34862e-05,0.176665,0.448763,0.205604,0.0231443,0.0547132,-0.000169287,-0.171826
7,-0.00210917,0.0021266,0.00363223,0.0035959,0.000610942,0.00595349,-0.00266116,1.0,-0.00333015,0.557086,0.000575642,0.00284137,0.0074675,-0.000641511,-0.000896707,0.000805748,-0.00474825
8,0.310648,0.405482,-0.0699567,0.109617,0.701142,0.0540471,0.104553,-0.00333015,1.0,0.00033679,0.170545,0.358488,0.195738,0.0115115,0.00448439,-0.00240064,-0.15939
9,-0.00677707,0.00219566,0.000698829,0.00798659,0.00235701,0.0124227,5.34862e-05,0.557086,0.00033679,1.0,0.000852694,0.0102312,0.0160537,-0.00289858,-0.00163782,-0.000623748,-0.0160896


In [102]:
'''SPLIT INTO TRAIN AND VALIDATION'''
def DataSplit(tX, y, ids, split_size = 0.1):
    train_valid_split = int(tX.shape[0] *split_size)
    print("Validation data size: ", train_valid_split)
    tX_valid = tX[train_valid_split:,:]
    y_valid = y[train_valid_split:]
    id_valid = ids[train_valid_split:]

    tX = tX[:train_valid_split]
    y = y[:train_valid_split]
    ids = ids[:train_valid_split]

    print("Shapes of tX, y & Ids for Training: ", tX.shape, y.shape, ids.shape)
    print("Shapes of tX, y & Ids for Validation: ", tX_valid.shape, y_valid.shape, id_valid.shape)
    return (tX, y, ids), (tX_valid, y_valid, id_valid)

'''LATER PUT UNDER MAIN'''
train_df, valid_df = DataSplit(tX, y, ids)
tX = train_df[0]
y = train_df[1]
ids = train_df[2]

tX_valid = valid_df[0]
y_valid = valid_df[1]
id_valid = valid_df[2]



Validation data size:  25000
Shapes of tX, y & Ids for Training:  (25000, 17) (25000,) (25000,)
Shapes of tX, y & Ids for Validation:  (225000, 17) (225000,) (225000,)


## Do your thing crazy machine learning thing here :) ...

In [103]:
'''GRAD AND LOSS FUNCTIONS'''
def compute_loss(y, tx, w, typ):
    '''typ = <LOSS_TYPE(WITH CAPITAL LETTERS)>'''
    loss = 0
    N = y.shape[0]
    if typ == "MSE":
        loss = (1/(2*N))*np.sum(np.square(y - (tx@w)))        
    elif typ == "MAE":
        loss = (1/(2*N))*np.sum(np.abs(y - (tx@w)))
    return loss

def compute_gradient(y, tx, w):
    '''GRADIENT COMPUTATION'''
    N = y.shape[0]
    e = y - tx@w
    grad = (-1/N) * (tx.T@e)
    return grad

def compute_stoch_gradient(y, tx, w):
    '''STOCHASTIC GRADIENT DESCENT GRADIENT COMPUTATION''' 
    N = y.shape[0]
    e = y - tx@w
    grad = (-1/N)*(tx.T@e)
    return grad

def compute_ls_loss(y, tx, w):
    '''LEAST SQUARES WITH NORMAL EQUATIONS LOSS COMPUTATION'''
    loss = 0
    N = y.shape[0]
    loss = (1/(2*N))*(tx.T@(y - tx@w))
    
def compute_rdg_loss(y, tx, w, lambda_):
    '''RIDGE REGRESSION LOSS COMPUTATION'''
    loss = 0
    N = y.shape[0]
    loss = (1/(2*N))*np.sum(np.square(y - (tx@w))) + (lambda_*np.sum(w.T@w))
    return loss

def sigmoid(tx, w):
    '''SIGMOID CALCULATION'''
    z = 1 / (1 + np.exp(-1*(tx@w)))
    return z

def compute_log_loss(y, tx, w):
    '''LOGISTIC LOSS'''
    loss = 0;
    sigm = sigmoid(tx,w)
    N = y.shape[0]
    loss = (-1/N)*np.sum(y.T@np.log(sigm) + ((1-y).T@np.log(1 - sigm)))
    
    return loss
def compute_log_gradient(y, tx, w):
    '''GRADIENT COMPUTATION FOR LR'''
    N = y.shape[0]
    z = sigmoid(tx,w)
    grad = (1/N) * (tx.T@(z - y))
    return grad

def compute_reg_log_loss(y, tx, w, lambda_):
    '''LOGISTIC LOSS WITH REGULARIZATION'''
    loss = 0;
    sigm = sigmoid(tx,w)
    N = y.shape[0]
    loss = (-1/N)*(np.sum(y.T@np.log(sigm) + ((1-y).T@np.log(1 - sigm))) + ((lambda_/2)*np.sum(w.T@w)))
    
    return loss
def compute_reg_log_gradient(y, tx, w, lambda_):
    '''GRADIENT COMPUTATION FOR LR WITH REGULARIZATION'''
    N = y.shape[0]
    z = sigmoid(tx,w)
    grad = (1/N) * ((tx.T@(z - y)) + (lambda_*w))
    return grad

In [104]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    '''BATCH GRADIENT DESCENT'''
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss(y, tx, w, "MSE")
        grad = compute_gradient(y, tx, w)
        w = w - (gamma * grad)
        #print("Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))

    return (w, loss)

In [105]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    '''STOCHASTIC GRADIENT DESCENT'''
    w = initial_w 
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, 1):
            loss = compute_loss(minibatch_y, minibatch_tx, w, "MSE")
            grad = compute_gradient(minibatch_y, minibatch_tx, w)
            w = w - gamma * grad
            #print("Stochastic Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))
    return (w, loss)

In [106]:
def least_squares(y, tx):
    '''COMPUTE W_STAR: WEIGHT FOR NORMAL EQUATIONS BY LINEAR EQUATION SOLVER'''
    w_star = np.linalg.solve(tx.T@tx, tx.T@y)
    loss = compute_ls_loss(y, tx, w_star)
    return (w_star,loss)

In [107]:
def ridge_regression(y, tx, lambda_):
    '''RIDGE REGRESSION WITH LAMBDA PARAMETER AS REGULARIZATION PARAMETER'''
    N = y.shape[0]
    w_ridge = np.linalg.inv((tx.T@tx)+(lambda_/(2*N))*np.identity(tx.shape[1]))@tx.T@y
    loss = compute_rdg_loss(y, tx, w_ridge, lambda_)
    return (w_ridge, loss)
    

In [108]:
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    '''FOR GRADIENT DESCENT'''
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_log_loss(y, tx, w)
        grad = compute_log_gradient(y, tx, w)
        w = w - (gamma * grad)
        #print("Logistic Regression Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))

    return (w, loss)
    '''FOR STOCHASTIC GRADIENT DESCENT'''
    '''
    w = initial_w 
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, 1):
            loss = compute_log_loss(minibatch_y, minibatch_tx, w)
            grad = compute_log_gradient(minibatch_y, minibatch_tx, w)
            w = w - gamma * grad
            #print("Stochastic Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))
    return (w, loss)
    '''

In [109]:
def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    '''FOR GRADIENT DESCENT WITH REGULARIZATION'''
    '''FOR GRADIENT DESCENT'''
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_reg_log_loss(y, tx, w, lambda_)
        grad = compute_reg_log_gradient(y, tx, w, lambda_)
        w = w - (gamma * grad)
        #print("Logistic Regression Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))

    return (w, loss)
    '''FOR STOCHASTIC GRADIENT DESCENT'''
    '''
    w = initial_w 
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, 1):
            loss = compute_reg_log_loss(minibatch_y, minibatch_tx, w, lambda_)
            grad = compute__reg_log_gradient(minibatch_y, minibatch_tx, w, lambda_)
            w = w - gamma * grad
            #print("Stochastic Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))
    return (w, loss)
    '''

## Generate predictions and save ouput in csv format for submission:

In [110]:
DATA_TEST_PATH = '../../data_project1/test.csv' # TODO: download train data and supply path here 
_, tX_test_old, ids_test = load_csv_data(DATA_TEST_PATH)

In [111]:
'''DATA PREPROCESSED AND STANDARDIZED'''
tX_test_pp = PreProcess(tX_test_old)
tX_test = FeatureSynthesis(tX_test_pp, tX_test_old)
tX, tX_mean, tX_std = standardize(tX)
tX_test, tX_t_mean, tX_t_std = standardize(tX_test)
tX[tX <= -999] = 0
tX_test[tX_test <= -999] = 0
CorrMap(tX)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,1.0,0.187057,0.0441282,0.0547502,0.35147,-0.416097,-0.15012,0.0117089,0.310749,-0.0044413,0.191797,-0.166144,-0.212023,0.0237979,-0.163294,0.022473,0.219345
1,0.187057,1.0,0.586873,-0.00583888,0.0969745,-0.0964243,0.288142,0.00435259,0.403539,-0.0110696,-0.0777539,0.0530509,-0.027047,-0.049762,-0.0184631,-0.00605001,0.0118478
2,0.0441282,0.586873,1.0,-0.122878,0.0396088,-0.204055,-0.198457,-0.000769951,-0.0705288,-0.0107163,-0.351309,-0.406646,-0.348392,-0.139308,-0.0579639,-0.00305371,0.304352
3,0.0547502,-0.00583888,-0.122878,1.0,0.030556,0.141944,0.0805554,0.00663931,0.0850146,0.00949873,0.484449,0.358434,0.282414,0.020259,0.0233787,-0.0053734,-0.144226
4,0.35147,0.0969745,0.0396088,0.030556,1.0,-0.0356787,-0.477465,0.00486578,0.707674,0.000467607,0.0431673,0.0500398,0.0584767,0.00858368,-0.0354145,0.00637269,-0.0408042
5,-0.416097,-0.0964243,-0.204055,0.141944,-0.0356787,1.0,0.137794,0.000536186,0.0513226,0.0118104,0.158807,0.424397,0.492527,0.0185331,0.111697,-0.000669002,-0.535995
6,-0.15012,0.288142,-0.198457,0.0805554,-0.477465,0.137794,1.0,-0.00743797,0.095063,-0.00479393,0.152149,0.445197,0.205779,0.0264275,0.0580833,0.00125962,-0.17715
7,0.0117089,0.00435259,-0.000769951,0.00663931,0.00486578,0.000536186,-0.00743797,1.0,-0.00354952,0.558894,0.0106231,0.00466345,0.00764116,0.00482427,0.00136008,0.00134411,-0.00925113
8,0.310749,0.403539,-0.0705288,0.0850146,0.707674,0.0513226,0.095063,-0.00354952,1.0,-0.00388633,0.15175,0.359082,0.197031,0.0189898,0.00152486,0.00547788,-0.15901
9,-0.0044413,-0.0110696,-0.0107163,0.00949873,0.000467607,0.0118104,-0.00479393,0.558894,-0.00388633,1.0,0.00745412,0.00788229,0.0135267,-0.00017637,-0.00293746,0.00308655,-0.0189783


In [112]:
init_w = InitWeights()
print(init_w.shape)
print(init_w)

(17,)
[0.06985265 0.97844462 0.71939958 0.95301803 0.11172363 0.11313413
 0.31713252 0.9532279  0.56993231 0.02600614 0.94400587 0.31586729
 0.86147554 0.62111209 0.25077319 0.86661924 0.08011757]


In [113]:
'''HYPER PARAMETERS FOR TUNING'''
max_iter = np.array([200, 300, 400, 500, 600, 700, 800, 1000])
gamma = np.array([1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9, 1e-10])
best_res = np.zeros((6,1))
best_gamma = np.zeros((6,1))
best_iter = np.zeros((6,1))
best_lambda = np.zeros((6,1))
best_grad = np.zeros((6,tX.shape[1]))

In [114]:
'''PREDICTIONS FOR MODELS'''
'''BATCH GD'''
count = 0
for n_iter in max_iter:
    for n_gamma in gamma:
        (w1,loss1) = least_squares_GD(y, tX, init_w, n_iter, n_gamma)
        gd_tr_pred = predict_labels(w1, tX_valid)
        res = (gd_tr_pred == y_valid).mean()
        if res > best_res[count]:
            best_grad[count] = w1
            best_res[count] = res
            best_iter[count] = n_iter
            best_gamma[count] = n_gamma
print("Parameters for best accuracy in BGD: ", best_res[count], " are gamma:= ",best_gamma[count], " & iteration number:=", best_iter[count])


Parameters for best accuracy in BGD:  [0.71826667]  are gamma:=  [0.1]  & iteration number:= [1000.]


In [115]:
'''SGD'''
count = 1
for n_iter in max_iter:
    for n_gamma in gamma:
        (w2,loss2) = least_squares_SGD(y, tX, init_w, n_iter, n_gamma)
        sgd_tr_pred = predict_labels(w2, tX_valid)
        res = (sgd_tr_pred == y_valid).mean()
        if res > best_res[count]:
            best_grad[count] = w2
            best_res[count] = res
            best_iter[count] = n_iter
            best_gamma[count] = n_gamma
print("Parameters for best accuracy in SGD: ", best_res[count], " are gamma:= ",best_gamma[count], " & iteration number:=", best_iter[count])


KeyboardInterrupt: 

In [116]:
'''LS WITH NORMAL EQ'''
count = 2
(w3,loss3) = least_squares(y, tX)
ls_tr_pred = predict_labels(w3, tX_valid)
print((ls_tr_pred == y_valid).mean())

0.6621955555555555


In [117]:
'''RIDGE REGRESSION'''
'''CHOOSE BEST LAMBDA'''
count = 3
lambda_ = np.logspace(-1, -10, 50)
min_loss = 1000000
ind = 0
for i in range(lambda_.shape[0]):
    (w4,loss4) = ridge_regression(y, tX, lambda_[i])
    if min_loss > loss4:
        best_lambda[count] = lambda_[i]
        min_loss = loss4
(w4,loss4) = ridge_regression(y, tX, best_lambda[count])       
rd_tr_pred = predict_labels(w4, tX_valid)
print((rd_tr_pred == y_valid).mean())

0.6621955555555555


In [None]:
'''LOGISTIC REGRESSION WITH (STOCHASTIC) GRADIENT DESCENT'''
count = 4
for n_iter in max_iter:
    for n_gamma in gamma:
        (w5,loss5) = logistic_regression(y, tX, init_w, n_iter, n_gamma)
        log_tr_pred = predict_labels(w5, tX_valid)
        res = (log_tr_pred == y_valid).mean()
        if res > best_res[count]:
            best_grad[count] = w5
            best_res[count] = res
            best_iter[count] = n_iter
            best_gamma[count] = n_gamma
print("Parameters for best accuracy in LR with no regularization: ", best_res[count], " are gamma:= ",\
      best_gamma[count], " & iteration number:=", best_iter[count])



In [None]:
'''LOGISTIC REGRESSION WITH (STOCHASTIC) GRADIENT DESCENT + REGULARIZATION'''
'''FOR OPTIMAL PARAMETERS: TAKES SOME TIME TO TRAIN COMPLETELY'''
'''TO ENABLE LAMBDA ITERATION: UNCOMMENT LAMBDA_2 LINES + CHANGE best_lambda[3] TO lambda_2[n_lambda]'''
'''OTHERWISE, LAMBDA CHOSEN FOR RIDGE REGRESSION WILL BE USED'''
count = 5
ind2 = 0
min_loss2 = 1000000
#lambda_2 = np.logspace(-1, -6, 30)
for n_iter in max_iter:
    for n_gamma in gamma:
        #for n_lambda in range(lambda_2.shape[0]):
        (w6,loss6) = reg_logistic_regression(y, tX, best_lambda[3], init_w, n_iter, n_gamma)
        log_tr_pred = predict_labels(w6, tX_valid)
        res = (log_tr_pred == y_valid).mean()
        if res > best_res[count]:
            best_grad[count] = w6
            best_res[count] = res
            best_iter[count] = n_iter
            best_gamma[count] = n_gamma
            #best_lambda[count] = lambda_2[n_lambda]
print("Parameters for best accuracy in LR with regularization: ", best_res[count], " are gamma:= ",\
      best_gamma[count], ", iteration number:=", best_iter[count], " & lambda:", best_lambda[count])


In [None]:
OUTPUT_PATH = '../../data_project1/lr.csv' # TODO: fill in desired name of output file for submission
y_pred = predict_labels(best_grad[0], tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)