In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2
import seaborn as sns

np.random.seed(2)

## Load the training data into feature matrix, class labels, and event ids, and separate the dataset:

In [2]:
from implementations import *

In [4]:
DATA_TRAIN_PATH = '../data/train.csv' 
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

In [5]:
DATA_TEST_PATH = '../data/test.csv'  
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)

In [6]:
tX_list, ids_list, y_list = separate_dataset(tX, ids, y)
tX_test_list, ids_test_list = separate_dataset(tX_test, ids_test) 

In [7]:
tX_list, tX_test_list, y_list = clean_data(tX_list, tX_test_list, y_list)

## GRID SEARCH

In [8]:
# FUNCTION
# 1 = least squares GD
# 2 = least squares SGD
# 3 = least squares
# 4 = ridge regression
# 5 = logistic regression
# 6 = reg logistic regression

function = 3

degrees = range(1,10)
lambdas = np.logspace(-10, 0, 1)
gammas = np.logspace(-4, 0, 1)

degree_vec = []
lambda_vec = []
gamma_vec = []

for i in range(6):
    print('Dataset part {l}:'.format(l = i))
    rmse_te, BestDeg, BestLambda, BestGamma = grid_search(y_list[i], tX_list[i], function, 4, degrees, lambdas, gammas, dataset = i)
    degree_vec.append(BestDeg)
    lambda_vec.append(BestLambda)
    gamma_vec.append(BestGamma)
    print('     Best degree: {d}'.format(d = BestDeg))
    print('     Best lambda: {m}'.format(m = BestLambda))
    print('     Best gamma: {m}'.format(m = BestGamma))
    print('     Loss: {lo}'.format(lo = rmse_te))

print("Best degree vector:")
print(degree_vec)
print("Best lambda vector:")
print(lambda_vec)
print("Best gamma vector:")
print(gamma_vec)

Dataset part 0:
     Best degree: 2
     Best lambda: 1e-10
     Best gamma: 0.1
     Loss: 6141.75
Dataset part 1:
     Best degree: 3
     Best lambda: 1e-10
     Best gamma: 0.1
     Loss: 14702.75
Dataset part 2:
     Best degree: 2
     Best lambda: 1e-10
     Best gamma: 0.1
     Loss: 1709.75
Dataset part 3:
     Best degree: 3
     Best lambda: 1e-10
     Best gamma: 0.1
     Loss: 13373.0
Dataset part 4:
     Best degree: 3
     Best lambda: 1e-10
     Best gamma: 0.01
     Loss: 991.25
Dataset part 5:
     Best degree: 3
     Best lambda: 1e-10
     Best gamma: 0.1
     Loss: 13462.75
[2, 3, 2, 3, 3, 3]
[1e-10, 1e-10, 1e-10, 1e-10, 1e-10, 1e-10]
[0.1, 0.1, 0.1, 0.1, 0.01, 0.1]


## TRAINING

In [11]:
weights_list = []
loss_list = []
mat_tX_test_list = []

for i in range(6):

    if (function == 1):
        max_iters = 300
        mat_tX, mat_tX_test = build_poly_log(tX_list[i], degree_vec[i], tX_test_list[i], i)
        initial_w = np.zeros(mat_tX.shape[1])

        w, l = least_squares_GD(y_list[i], mat_tX, initial_w, max_iters, gamma_vec[i])

    elif (function == 2):
        max_iters = 1000
        mat_tX, mat_tX_test = build_poly_log(tX_list[i], degree_vec[i], tX_test_list[i], i)
        initial_w = np.zeros(mat_tX.shape[1])
        
        w, l = least_squares_SGD(y_list[i], mat_tX, initial_w, max_iters, gamma_vec[i])

    elif (function == 3):
        mat_tX, mat_tX_test = build_poly_log(tX_list[i], degree_vec[i], tX_test_list[i], i)
        w, l = least_squares(y_list[i], mat_tX)

    elif (function == 4):
        mat_tX, mat_tX_test = build_poly_log(tX_list[i], degree_vec[i], tX_test_list[i], i)
        w, l = ridge_regression(y_list[i], mat_tX, lambda_vec[i])
    
    elif (function == 5):
        max_iters = 7000
        mat_tX, mat_tX_test = build_poly_log(tX_list[i], degree_vec[i], tX_test_list[i], i)
        initial_w = np.zeros((mat_tX.shape[1],1))

        w, l = logistic_regression(y_list[i], mat_tX, initial_w, max_iters, gamma_vec[i])

    elif (function == 6):
        max_iters = 7000
        mat_tX, mat_tX_test = build_poly_log(tX_list[i], degree_vec[i], tX_test_list[i], i)
        initial_w = np.zeros((mat_tX.shape[1],1))

        w, l = reg_logistic_regression(y_list[i], mat_tX, initial_w, max_iters, lambda_vec[i], gamma_vec[i])

    else:
        print('error function name')

    weights_list.append(w)
    loss_list.append(l)
    mat_tX_test_list.append(mat_tX_test)    


 

## INFERENCE

In [None]:
# Eval

y_pred_list = separated_eval(weights_list, mat_tX_test_list) 

y_pred = np.concatenate((y_pred_list[0], y_pred_list[1], y_pred_list[2], y_pred_list[3], y_pred_list[4], y_pred_list[5]))
ids_test_sub = np.concatenate((ids_test_list[0], ids_test_list[1], ids_test_list[2], ids_test_list[3], ids_test_list[4], ids_test_list[5]))

OUTPUT_PATH = 'result.csv'
create_csv_submission(ids_test_sub, y_pred, OUTPUT_PATH)