In [2]:
# Useful starting lines
%matplotlib inline
import numpy as np
'''ONLY FOR VISUALIZATION'''
import pandas as pd 
import matplotlib.pyplot as plt
'''ONLY FOR VISUALIZATION'''
%load_ext autoreload
%autoreload 2

## Load the training data into feature matrix, class labels, and event ids:

In [3]:
from proj1_helpers import *
#Modify DATA_PATH if needed
DATA_TRAIN_PATH = '../../data_project1/train.csv'
y, tX_old, ids = load_csv_data(DATA_TRAIN_PATH)

In [5]:
'''DATASET INTRINSICS AND SHAPE (TARGETS AND IDS INCLUDED)'''
def DataSetInfo(tX_old, y, ids):
    print("Training examples: ", tX_old, " & shape: ")
    print("Targets: ", y)
    print("Ids: ",ids)
    print("Shapes of tX, y & Ids: ", tX_old.shape, y.shape, ids.shape)
DataSetInfo(tX_old, y, ids)

Training examples:  [[ 138.47    51.655   97.827 ...    1.24    -2.475  113.497]
 [ 160.937   68.768  103.235 ... -999.    -999.      46.226]
 [-999.     162.172  125.953 ... -999.    -999.      44.251]
 ...
 [ 105.457   60.526   75.839 ... -999.    -999.      41.992]
 [  94.951   19.362   68.812 ... -999.    -999.       0.   ]
 [-999.      72.756   70.831 ... -999.    -999.       0.   ]]  & shape: 
Targets:  [ 1. -1. -1. ...  1. -1. -1.]
Ids:  [100000 100001 100002 ... 349997 349998 349999]
Shapes of tX, y & Ids:  (250000, 30) (250000,) (250000,)


In [80]:

ind = np.ones((4,2))
ind[0,1] = 4
ind[1,1] = 3
a = [0, 1]
ind[a, :].shape

(2, 2)

In [82]:
'''INITIALIZE WEIGHTS'''
def InitWeights():
    ww = np.random.rand(tX.shape[1])
    #init_w = np.zeros(tX.shape[1])
    init_w = np.array(ww, dtype=np.float64)
    return init_w

'''TAKE LOG TRANSFORMATION OF FEATURES'''
def LogTransformData(tX, features):
    
    data = tX[:, features]
    indices = np.where(data > -999)
    data[indices] = np.log(1 + data[indices])

    return tX


'''IMPUTE DATA WITH MOST FREQUENT VALUES OR ZERO'''
def ImputeData(tX, typ="ZERO"):
    for i in range(tX.shape[1]):
        '''REPLACE ACCORDING TO NAN VALUES(-999)'''
        np.any(tX == -999)
        if np.any(tX[:, i] == -999):
            tX_nonzero = (tX[:, i] > -999)
            val, count = np.unique(tX[tX_nonzero, i], return_counts=True)
            if (len(val) >= 2) and typ == "MF":
                '''MOST FREQUENT VALUE'''
                tX[~tX_nonzero, i] = val[np.argmax(count)]
            elif typ == "ZERO":
                '''ZERO'''
                tX[~tX_nonzero, i] = 0
    return tX

'''PREPROCESS'''
def PreProcess(tX):
    '''FEATURES PICKED BY HAND'''
    feature_vec = np.array(([0, 2, 5, 9, 11, 13, 16, 19, 20, 21, 26, 29]))
    '''LUCKY FEATURE OF THE WEEK: 30 :)'''
    lucky_feature = np.array(([30]))
    tX = LogTransformData(tX, feature_vec)
    tX = ImputeData(tX, "MF")
    tX = Standardize(tX)
    return tX

'''DATASET SEPERATED IN TERMS OF CATEGORIES IN COLUMN 22'''
def Categorize(y, tX):
    '''CATEGORIES '''
    ind = [[] for j in range(4)]
    xx = [[] for j in range(4)]
    yy = [[] for j in range(4)]
    
    for i in range(4):
        ind[i] = np.nonzero(tX[:, 22] == 0)[i]
        xx[i] = tX[ind[i], :]
        yy[i] = y[ind[i]]
        
    return yy, xx, ind

'''PREDICTIONS INTO COMPARABLE FORM'''
def Decategorize(size, y_cat, ind):
    y = np.zeros((size, 1), dtype=np.float)
    for i in range(len(y)):
        y[ind[i]] = y_cat[i]    
    return y

'''STANDARDIZE'''
def Standardize(x):
    mean_x = np.mean(x, axis=0)
    x = x - mean_x
    std_x = np.std(x, axis=0)
    x[:, std_x > 0] = x[:, std_x > 0] / std_x[std_x > 0]
    return x, mean_x, std_x

'''FEATURE CORRELATION MAP: ONLY FOR VISUALIZATION'''
'''CORRELATED FEATURES: CORR > THRESHOLD : USE FOR SYNTHESIS'''
def CorrMap(tX):
    df = pd.DataFrame(tX)
    corr = df.corr()
    return corr.style.background_gradient(cmap='coolwarm')

'''FEATURE ENGINEERING'''
def FeatureSynthesis(tX_pp, tX_old):
    '''CORRELATED FEATURES WILL BE USED FOR NEW FEATURE ADDITION'''
    '''MIN PART'''
    #tX = np.hstack((tX_pp, np.minimum((tX_old[:,15:16] - tX_old[:,18:19]),np.minimum((tX_old[:,15:16] - tX_old[:,20:21]),(tX_old[:,18:19] - tX_old[:,20:21])))))                                           
    tX = np.hstack((tX_pp, np.minimum((tX_old[:,15:16] - tX_old[:,20:21]),(tX_old[:,18:19] - tX_old[:,20:21]))))
    tX = np.hstack((tX, np.minimum((tX_old[:,15:16] - tX_old[:,18:19]),(tX_old[:,15:16] - tX_old[:,20:21]))))
    tX = np.hstack((tX, (tX_old[:,18:19] - tX_old[:,20:21])))
    '''LN PART'''
    #tX = np.hstack((tX, np.log(np.abs(min((tX_old[:,13:14]*tX_old[:,14:15])))+(tX_old[:,13:14]*tX_old[:,14:15]))))                                                                                         
    #tX = np.hstack((tX, np.log(np.abs(min((tX_old[:,16:17]*tX_old[:,17:18])))+(tX_old[:,16:17]*tX_old[:,16:17]))))                                                                                         
    #tX = np.hstack((tX, np.log(np.abs(min((tX_old[:,23:24]*tX_old[:,24:25])))+(tX_old[:,23:24]*tX_old[:,24:25]))))                                                                                         
    return tX

'''SPLIT INTO TRAIN AND VALIDATION: HARDCODED'''
def DataSplit(tX, y, ids, tX_cat, split_size = 0.1):
    train_valid_split = int(tX.shape[0] *split_size)
    print("Validation data size: ", train_valid_split)
    tX = tX[train_valid_split:,:]
    tX_cat = tX_cat[train_valid_split:]
    y = y[train_valid_split:]
    ids = ids[train_valid_split:]

    tX_valid = tX[:train_valid_split,:]
    tX_cat_val = tX_cat[:train_valid_split]
    y_valid = y[:train_valid_split]
    ids_valid = ids[:train_valid_split]

    print("Shapes of tX, y & Ids for Training: ", tX.shape, y.shape, ids.shape)
    print("Shapes of tX, y & Ids for Validation: ", tX_valid.shape, y_valid.shape, ids_valid.shape)
    return (tX, y, ids,tX_cat), (tX_valid, y_valid, ids_valid, tX_cat_val)

'''RANDOM DATA SPLIT'''
def RandomizedDataSplit(tX, y, split_size = 0.1, my_seed=1):
    '''SET SEED FOR RANDOMNESS'''
    np.random.seed(my_seed)
    '''RANDOM INDEXES'''
    size = y.shape[0]
    ind = np.random.permutation(size)
    split = int(np.floor(split_size * size))
    
    ind_train = ind[:split]
    ind_valid = ind[split:]  
    
    '''SPLIT DATA ACCORDING TO RANDOM INDICES'''
    tX_train = tX[ind_train,:]
    tX_valid = tX[ind_valid,:]
    y_train = y[ind_train,:]
    y_valid = y[ind_valid,:]
    
    print("Shapes of tX, y & Ids for Training: ", tX.shape, y.shape, ids.shape)
    print("Shapes of tX, y & Ids for Validation: ", tX_valid.shape, y_valid.shape, ids_valid.shape)
    return (tX_train, y_train),(tX_valid, y_valid)

In [9]:
#tX = PreProcess(tX_old)
tX_pp = PreProcess(tX_old)
tX = FeatureSynthesis(tX_pp, tX_old)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,1.0,0.190109,0.0432514,0.0177575,0.349504,-0.419757,-0.145464,-0.00210917,0.310648,-0.00677707,0.183716,-0.167811,-0.210009
1,0.190109,1.0,0.579712,-0.000702134,0.0974898,-0.0908459,0.290011,0.0021266,0.405482,0.00219566,-0.0873304,0.0533004,-0.0529024
2,0.0432514,0.579712,1.0,-0.148081,0.0470462,-0.205441,-0.202035,0.00363223,-0.0699567,0.000698829,-0.402345,-0.407002,-0.448737
3,0.0177575,-0.000702134,-0.148081,1.0,0.0391929,0.178448,0.0957537,0.0035959,0.109617,0.00798659,0.269739,0.448925,0.403382
4,0.349504,0.0974898,0.0470462,0.0391929,1.0,-0.0388027,-0.474633,0.000610942,0.701142,0.00235701,0.0533856,0.0499036,0.0791164
5,-0.419757,-0.0908459,-0.205441,0.178448,-0.0388027,1.0,0.140885,0.00595349,0.0540471,0.0124227,0.180756,0.423363,0.452224
6,-0.145464,0.290011,-0.202035,0.0957537,-0.474633,0.140885,1.0,-0.00266116,0.104553,5.34862e-05,0.176665,0.448763,0.321341
7,-0.00210917,0.0021266,0.00363223,0.0035959,0.000610942,0.00595349,-0.00266116,1.0,-0.00333015,0.557086,0.000575642,0.00284137,0.00376299
8,0.310648,0.405482,-0.0699567,0.109617,0.701142,0.0540471,0.104553,-0.00333015,1.0,0.00033679,0.170545,0.358488,0.295108
9,-0.00677707,0.00219566,0.000698829,0.00798659,0.00235701,0.0124227,5.34862e-05,0.557086,0.00033679,1.0,0.000852694,0.0102312,0.0102777


In [10]:
CorrMap(tX_pp)

(250000, 16)

In [11]:
CorrMap(tX)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,1.0,0.190109,0.0432514,0.0177575,0.349504,-0.419757,-0.145464,-0.00210917,0.310648,-0.00677707,0.183716,-0.167811,-0.210009,0.0179209,-0.16072,0.0116236
1,0.190109,1.0,0.579712,-0.000702134,0.0974898,-0.0908459,0.290011,0.0021266,0.405482,0.00219566,-0.0873304,0.0533004,-0.0529024,-0.0450701,-0.0174414,-0.000397049
2,0.0432514,0.579712,1.0,-0.148081,0.0470462,-0.205441,-0.202035,0.00363223,-0.0699567,0.000698829,-0.402345,-0.407002,-0.448737,-0.132653,-0.0511354,0.000566199
3,0.0177575,-0.000702134,-0.148081,1.0,0.0391929,0.178448,0.0957537,0.0035959,0.109617,0.00798659,0.269739,0.448925,0.403382,0.0219618,0.0286189,-0.00484093
4,0.349504,0.0974898,0.0470462,0.0391929,1.0,-0.0388027,-0.474633,0.000610942,0.701142,0.00235701,0.0533856,0.0499036,0.0791164,0.00132068,-0.0304761,-0.00277192
5,-0.419757,-0.0908459,-0.205441,0.178448,-0.0388027,1.0,0.140885,0.00595349,0.0540471,0.0124227,0.180756,0.423363,0.452224,0.0131745,0.115743,-0.00687207
6,-0.145464,0.290011,-0.202035,0.0957537,-0.474633,0.140885,1.0,-0.00266116,0.104553,5.34862e-05,0.176665,0.448763,0.321341,0.0231443,0.0547132,-0.000169287
7,-0.00210917,0.0021266,0.00363223,0.0035959,0.000610942,0.00595349,-0.00266116,1.0,-0.00333015,0.557086,0.000575642,0.00284137,0.00376299,-0.000641511,-0.000896707,0.000805748
8,0.310648,0.405482,-0.0699567,0.109617,0.701142,0.0540471,0.104553,-0.00333015,1.0,0.00033679,0.170545,0.358488,0.295108,0.0115115,0.00448439,-0.00240064
9,-0.00677707,0.00219566,0.000698829,0.00798659,0.00235701,0.0124227,5.34862e-05,0.557086,0.00033679,1.0,0.000852694,0.0102312,0.0102777,-0.00289858,-0.00163782,-0.000623748


In [12]:
'''LATER PUT UNDER MAIN'''
train_df, valid_df = DataSplit(tX, y, ids, tX_old[:,22:23])
tX = train_df[0]
y = train_df[1]
y = y.reshape(y.shape[0],1)
ids = train_df[2]
ids = ids.reshape(ids.shape[0],1)
tX_cat = train_df[3]

tX_valid = valid_df[0]
y_valid = valid_df[1]
y_valid = y_valid.reshape(y_valid.shape[0],1)
id_valid = valid_df[2]
id_valid = id_valid.reshape(id_valid.shape[0],1)
tX_cat_val = valid_df[3]



Validation data size:  25000
Shapes of tX, y & Ids for Training:  (225000, 16) (225000,) (225000,)
Shapes of tX, y & Ids for Validation:  (25000, 16) (25000,) (25000,)


## Do your thing crazy machine learning thing here :) ...

In [13]:
'''GRAD AND LOSS FUNCTIONS'''
def compute_loss(y, tx, w, typ):
    '''typ = <LOSS_TYPE(WITH CAPITAL LETTERS)>'''
    loss = 0
    N = y.shape[0]
    if typ == "MSE":
        loss = (1/(2*N))*np.sum(np.square(y - (tx@w)))        
    elif typ == "MAE":
        loss = (1/(2*N))*np.sum(np.abs(y - (tx@w)))
    return loss

def compute_gradient(y, tx, w):
    '''GRADIENT COMPUTATION'''
    N = y.shape[0]
    e = y - tx@w
    grad = (-1/N) * (tx.T@e)
    return grad

def compute_stoch_gradient(y, tx, w):
    '''STOCHASTIC GRADIENT DESCENT GRADIENT COMPUTATION''' 
    N = y.shape[0]
    e = y - tx@w
    grad = (-1/N)*(tx.T@e)
    return grad

def compute_ls_loss(y, tx, w):
    '''LEAST SQUARES WITH NORMAL EQUATIONS LOSS COMPUTATION'''
    loss = 0
    N = y.shape[0]
    loss = (1/(2*N))*(tx.T@(y - tx@w))
    
def compute_rdg_loss(y, tx, w, lambda_):
    '''RIDGE REGRESSION LOSS COMPUTATION'''
    loss = 0
    N = y.shape[0]
    loss = (1/(2*N))*np.sum(np.square(y - (tx@w))) + (lambda_*np.sum(w.T@w))
    return loss

def sigmoid(tx, w):
    '''SIGMOID CALCULATION'''
    z = 1 / (1 + np.exp(-1*(tx@w)))
    return z

def compute_log_loss(y, tx, w):
    '''LOGISTIC LOSS'''
    loss = 0;
    sigm = sigmoid(tx,w)
    N = y.shape[0]
    loss = (-1/N)*np.sum(y.T@np.log(sigm) + ((1-y).T@np.log(1 - sigm)))
    
    return loss
def compute_log_gradient(y, tx, w):
    '''GRADIENT COMPUTATION FOR LR'''
    N = y.shape[0]
    z = sigmoid(tx,w)
    grad = (1/N) * (tx.T@(z - y))
    return grad

def compute_reg_log_loss(y, tx, w, lambda_):
    '''LOGISTIC LOSS WITH REGULARIZATION'''
    loss = 0;
    sigm = sigmoid(tx,w)
    N = y.shape[0]
    loss = (-1/N)*(np.sum(y.T@np.log(sigm) + ((1-y).T@np.log(1 - sigm))) + ((lambda_/2)*np.sum(w.T@w)))
    
    return loss
def compute_reg_log_gradient(y, tx, w, lambda_):
    '''GRADIENT COMPUTATION FOR LR WITH REGULARIZATION'''
    N = y.shape[0]
    z = sigmoid(tx,w)
    grad = (1/N) * ((tx.T@(z - y)) + (lambda_*w))
    return grad

In [14]:
def least_squares_GD(y, tx, initial_w, max_iters, gamma):
    '''BATCH GRADIENT DESCENT'''
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_loss(y, tx, w, "MSE")
        grad = compute_gradient(y, tx, w)
        w = w - (gamma * grad)
        #print("Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))

    return (w, loss)

In [15]:
def least_squares_SGD(y, tx, initial_w, max_iters, gamma):
    '''STOCHASTIC GRADIENT DESCENT'''
    w = initial_w 
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, 1):
            loss = compute_loss(minibatch_y, minibatch_tx, w, "MSE")
            grad = compute_gradient(minibatch_y, minibatch_tx, w)
            w = w - gamma * grad
            #print("Stochastic Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))
    return (w, loss)

In [16]:
def least_squares(y, tx):
    '''COMPUTE W_STAR: WEIGHT FOR NORMAL EQUATIONS BY LINEAR EQUATION SOLVER'''
    w_star = np.linalg.solve(tx.T@tx, tx.T@y)
    loss = compute_ls_loss(y, tx, w_star)
    return (w_star,loss)

In [17]:
def ridge_regression(y, tx, lambda_):
    '''RIDGE REGRESSION WITH LAMBDA PARAMETER AS REGULARIZATION PARAMETER'''
    N = y.shape[0]
    w_ridge = np.linalg.inv((tx.T@tx)+(lambda_/(2*N))*np.identity(tx.shape[1]))@tx.T@y
    loss = compute_rdg_loss(y, tx, w_ridge, lambda_)
    return (w_ridge, loss)
    

In [18]:
def logistic_regression(y, tx, initial_w, max_iters, gamma):
    '''FOR GRADIENT DESCENT'''
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_log_loss(y, tx, w)
        grad = compute_log_gradient(y, tx, w)
        w = w - (gamma * grad)
        #print("Logistic Regression Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))

    return (w, loss)
    '''FOR STOCHASTIC GRADIENT DESCENT'''
    '''
    w = initial_w 
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, 1):
            loss = compute_log_loss(minibatch_y, minibatch_tx, w)
            grad = compute_log_gradient(minibatch_y, minibatch_tx, w)
            w = w - gamma * grad
            #print("Stochastic Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))
    return (w, loss)
    '''

In [19]:
def reg_logistic_regression(y, tx, lambda_, initial_w, max_iters, gamma):
    '''FOR GRADIENT DESCENT WITH REGULARIZATION'''
    '''FOR GRADIENT DESCENT'''
    w = initial_w
    for n_iter in range(max_iters):
        loss = compute_reg_log_loss(y, tx, w, lambda_)
        grad = compute_reg_log_gradient(y, tx, w, lambda_)
        w = w - (gamma * grad)
        #print("Logistic Regression Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))

    return (w, loss)
    '''FOR STOCHASTIC GRADIENT DESCENT'''
    '''
    w = initial_w 
    for n_iter in range(max_iters):
        for minibatch_y, minibatch_tx in batch_iter(y, tx, 1):
            loss = compute_reg_log_loss(minibatch_y, minibatch_tx, w, lambda_)
            grad = compute__reg_log_gradient(minibatch_y, minibatch_tx, w, lambda_)
            w = w - gamma * grad
            #print("Stochastic Gradient Descent({bi}/{ti}): loss={l}".format(bi=n_iter, ti=max_iters - 1, l=loss))
    return (w, loss)
    '''

## Generate predictions and save ouput in csv format for submission:

In [20]:
DATA_TEST_PATH = '../../data_project1/test.csv' # TODO: download train data and supply path here 
_, tX_test_old, ids_test = load_csv_data(DATA_TEST_PATH)

In [21]:
'''DATA PREPROCESSED AND STANDARTIZATION'''
tX_test_pp = PreProcess(tX_test_old)
tX_test = FeatureSynthesis(tX_test_pp, tX_test_old)

tX, tX_mean, tX_std = standardize(tX)
tX_final = np.array([tX[np.where(tX_cat == 0),:][0], tX[np.where(tX_cat == 1),:][0], tX[np.where(tX_cat == 2),:][0], tX[np.where(tX_cat == 3),:][0]]
y_final = [y[np.where(tX_cat == 0),:][0],y[np.where(tX_cat == 1),:][0], y[np.where(tX_cat == 2),:][0], y[np.where(tX_cat == 3),:][0]]
ids_final = [ids[np.where(tX_cat == 0),:][0],ids[np.where(tX_cat == 1),:][0], ids[np.where(tX_cat == 2),:][0], ids[np.where(tX_cat == 3),:][0]]

tX_valid, tX_mean_val, tX_std_val = standardize(tX_valid)
tX_val_final = [tX_valid[np.where(tX_cat_val == 0),:][0], tX_valid[np.where(tX_cat_val == 1),:][0], tX_valid[np.where(tX_cat_val == 2),:][0], tX_valid[np.where(tX_cat_val == 3),:][0]]
y_val_final = [y_valid[np.where(tX_cat_val == 0),:][0],y_valid[np.where(tX_cat_val == 1),:][0], y_valid[np.where(tX_cat_val == 2),:][0], y_valid[np.where(tX_cat_val == 3),:][0]]
ids_val_final = [id_valid[np.where(tX_cat_val == 0),:][0],id_valid[np.where(tX_cat_val == 1),:][0], id_valid[np.where(tX_cat_val == 2),:][0], id_valid[np.where(tX_cat_val == 3),:][0]]

tX_cat_test = tX_test_old[:,22:23]
ids_test = ids_test.reshape(ids_test.shape[0],1)
tX_test, tX_t_mean, tX_t_std = standardize(tX_test)
tX_test_final = [tX_test[np.where(tX_cat_test == 0),:][0], tX_test[np.where(tX_cat_test == 1),:][0], tX_test[np.where(tX_cat_test == 2),:][0], tX_test[np.where(tX_cat_test == 3),:][0]]
ids_test_final = [ids_test[np.where(tX_cat_test == 0),:][0],ids_test[np.where(tX_cat_test == 1),:][0], ids_test[np.where(tX_cat_test== 2),:][0], ids_test[np.where(tX_cat_test == 3),:][0]]

for i in range(4):
    tX_final[i][np.where(tX_final[i] <= -999)] = 0
    tX_val_final[i][np.where(tX_val_final[i] <= -999)] = 0
    tX_test_final[i][np.where(tX_test_final[i] <= -999)] = 0

In [22]:
'''HYPER PARAMETERS FOR TUNING'''
max_iter = np.array([200, 300, 400, 500, 600, 700, 800, 1000])
gamma = np.array([1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9, 1e-10])
best_res = np.zeros((6,1))
best_gamma = np.zeros((6,1))
best_iter = np.zeros((6,1))
best_lambda = np.zeros((6,1))
best_grad = np.zeros((6,1))

In [23]:
init_w = np.array((InitWeights(),InitWeights(),InitWeights(),InitWeights()))
print(init_w.shape)
print(init_w)

(4, 16)
[[0.47752817 0.53237564 0.614171   0.40338695 0.4637636  0.64416938
  0.62139253 0.26214141 0.57306712 0.61463914 0.40939511 0.36947413
  0.59873518 0.14783013 0.77775938 0.80120927]
 [0.68800538 0.64373082 0.33953821 0.54799493 0.35130658 0.96164668
  0.5968562  0.51642995 0.91842214 0.74127546 0.17422433 0.57602151
  0.66033403 0.63888455 0.81576852 0.77760735]
 [0.43260563 0.7366731  0.38866435 0.51113149 0.14940554 0.08395296
  0.43442385 0.00697077 0.56693497 0.99265572 0.19162477 0.27167145
  0.82826809 0.18852534 0.93029302 0.45987876]
 [0.96037117 0.12330554 0.36276116 0.68132134 0.02006043 0.26148079
  0.17547043 0.90954437 0.97759811 0.41406438 0.94430178 0.16762658
  0.31519373 0.12138497 0.9689438  0.08747229]]


In [1]:
'''PREDICTIONS FOR MODELS'''
'''BATCH GD'''
count = 0
w1 = np.zeros((init_w.shape[0],init_w.shape[1]))
mod1 = [0, 1, 2 ,3]
gd_tr_pred = [np.zeros((y_val_final[0].shape)), np.zeros((y_val_final[1].shape)), np.zeros((y_val_final[2].shape)), np.zeros((y_val_final[3].shape))]
for n_iter in max_iter:
    for n_gamma in gamma:
        for mod in mod1:
            (w1[mod],loss1) = least_squares_GD(y_final[mod], tX_final[mod], init_w[mod], n_iter, n_gamma)
            gd_tr_pred[mod] = predict_labels(w1[mod], tX_val_final[mod])
        #gd_tr_pred = np.flatten(gd_tr_pred)
        #gd_tr_pred = np.sort(gd_tr_pred,'mergesort')
        for acc in range(len(mod1)):
            
        res = (gd_tr_pred == y_val_final).mean()
        if res > best_res[count]:
            best_grad[count] = np.flatten(w1)
            best_res[count] = res
            best_iter[count] = n_iter
            best_gamma[count] = n_gamma
print("Parameters for best accuracy in BGD: ", best_res[count], " are gamma:= ",best_gamma[count], " & iteration number:=", best_iter[count])


NameError: name 'np' is not defined

In [None]:
init_w = np.array((InitWeights(tX_final[0]),InitWeights(tX_final[1]),InitWeights(tX_final[2]),InitWeights(tX_final[3])))
print(init_w.shape)
print(init_w)

In [None]:
'''SGD'''
count = 1
for n_iter in max_iter:
    for n_gamma in gamma:
        (w2,loss2) = least_squares_SGD(y, tX, init_w, n_iter, n_gamma)
        sgd_tr_pred = predict_labels(w2, tX_valid)
        res = (sgd_tr_pred == y_valid).mean()
        if res > best_res[count]:
            best_grad[count] = w2
            best_res[count] = res
            best_iter[count] = n_iter
            best_gamma[count] = n_gamma
print("Parameters for best accuracy in SGD: ", best_res[count], " are gamma:= ",best_gamma[count], " & iteration number:=", best_iter[count])


In [None]:
'''LS WITH NORMAL EQ'''
count = 2
(w3,loss3) = least_squares(y, tX)
ls_tr_pred = predict_labels(w3, tX_valid)
print((ls_tr_pred == y_valid).mean())

In [None]:
'''RIDGE REGRESSION'''
'''CHOOSE BEST LAMBDA'''
count = 3
lambda_ = np.logspace(-1, -10, 50)
min_loss = 1000000
ind = 0
for i in range(lambda_.shape[0]):
    (w4,loss4) = ridge_regression(y, tX, lambda_[i])
    if min_loss > loss4:
        best_lambda[count] = lambda_[i]
        min_loss = loss4
(w4,loss4) = ridge_regression(y, tX, best_lambda[count])       
rd_tr_pred = predict_labels(w4, tX_valid)
print((rd_tr_pred == y_valid).mean())

In [None]:
init_w = np.array((InitWeights(),InitWeights(),InitWeights(),InitWeights()))
print(init_w.shape)
print(init_w)

In [None]:
'''LOGISTIC REGRESSION WITH (STOCHASTIC) GRADIENT DESCENT'''
count = 4
for n_iter in max_iter:
    for n_gamma in gamma:
        (w5,loss5) = logistic_regression(y, tX, init_w, n_iter, n_gamma)
        log_tr_pred = predict_labels(w5, tX_valid)
        res = (log_tr_pred == y_valid).mean()
        if res > best_res[count]:
            best_grad[count] = w5
            best_res[count] = res
            best_iter[count] = n_iter
            best_gamma[count] = n_gamma
print("Parameters for best accuracy in LR with no regularization: ", best_res[count], " are gamma:= ",\
      best_gamma[count], " & iteration number:=", best_iter[count])



In [None]:
init_w = np.array((InitWeights(),InitWeights(),InitWeights(),InitWeights()))
print(init_w.shape)
print(init_w)

In [None]:
'''LOGISTIC REGRESSION WITH (STOCHASTIC) GRADIENT DESCENT + REGULARIZATION'''
'''FOR OPTIMAL PARAMETERS: TAKES SOME TIME TO TRAIN COMPLETELY'''
'''TO ENABLE LAMBDA ITERATION: UNCOMMENT LAMBDA_2 LINES + CHANGE best_lambda[3] TO lambda_2[n_lambda]'''
'''OTHERWISE, LAMBDA CHOSEN FOR RIDGE REGRESSION WILL BE USED'''
count = 5
ind2 = 0
min_loss2 = 1000000
#lambda_2 = np.logspace(-1, -6, 30)
for n_iter in max_iter:
    for n_gamma in gamma:
        #for n_lambda in range(lambda_2.shape[0]):
        (w6,loss6) = reg_logistic_regression(y, tX, best_lambda[3], init_w, n_iter, n_gamma)
        log_tr_pred = predict_labels(w6, tX_valid)
        res = (log_tr_pred == y_valid).mean()
        if res > best_res[count]:
            best_grad[count] = w6
            best_res[count] = res
            best_iter[count] = n_iter
            best_gamma[count] = n_gamma
            #best_lambda[count] = lambda_2[n_lambda]
print("Parameters for best accuracy in LR with regularization: ", best_res[count], " are gamma:= ",\
      best_gamma[count], ", iteration number:=", best_iter[count], " & lambda:", best_lambda[count])


In [None]:
OUTPUT_PATH = '../../data_project1/lr.csv' # TODO: fill in desired name of output file for submission
y_pred = predict_labels(best_grad[0], tX_test)
create_csv_submission(ids_test, y_pred, OUTPUT_PATH)