# Project 1 - Team BAK

## Step 1 - Load and Preprocess Data

In [2]:
#Import some libraries
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import datetime
%load_ext autoreload
%autoreload 2

from pre_process import *
from cross_validation import *
from implementations import *

In [3]:
#Load the data
y, x, ids = load_csv_data('../data/train.csv')
_, x_test, ids_test = load_csv_data('../data/test.csv')
x_train, x_test = pre_process_data(x, x_test)

In [4]:
y.shape, x_train.shape, ids.shape, x_test.shape, ids_test.shape

((250000,), (250000, 24), (250000,), (568238, 24), (568238,))

### HYPERPARAMETER SELECTION

In [4]:
# CONSTANTS
SEED = 7
K = 3
JET_COLUMN = 16

# canditates parameters
degrees_candidates = [3, 6, 7]
alphas_candidates=[3, 4, 7, 9]

# SELECT METHOD
methods = [mean_squared_error_gd, mean_squared_error_sgd, least_squares, ridge_regression, 
           logistic_regression_gradient_descent, logistic_regression_regularized_gradient_descent]
params_per_method = []

Function select_parameters_jet returns the optimal parameter values found for each PRI_jet_num value. If the model does not use the lambda parameter, then it is returned as -1.

In [5]:
# MSE GRADIENT DESCENT
method = methods[0]
lambdas_candidates = None
    
opt_degree, opt_lambda, opt_alpha, accuracy = select_parameters_jet(y,x_train, method, degrees_candidates,
                                                                  alphas_candidates,K, SEED, lambdas_candidates)
print('Optimal alphas per jet_class:',opt_alpha)
print('Optimal degrees per jet_class:',opt_degree)
print('Optimal lambdas per jet_class:',opt_lambda)
print('Maximum accuracy predicted per jet_class:',accuracy)
params_per_method.append([opt_alpha, opt_degree, opt_lambda, accuracy])

  e = y - tx@w
  w = w - gamma * gradient


Optimal alphas per jet_class: [9.0, 7.0, 4.0, 9.0]
Optimal degrees per jet_class: [3.0, 3.0, 3.0, 3.0]
Optimal lambdas per jet_class: [-1.0, -1.0, -1.0, -1.0]
Maximum accuracy predicted per jet_class: [0.8261670269837458, 0.7848060455999174, 0.7882054030449196, 0.7723335138061721]


In [6]:
# MSE STOCHASTIC GRADIENT DESCENT
method = methods[1]
lambdas_candidates = None
    
opt_degree, opt_lambda, opt_alpha, accuracy = select_parameters_jet(y,x_train, method, degrees_candidates,
                                                                  alphas_candidates,K, SEED, lambdas_candidates)
print('Optimal alphas per jet_class:',opt_alpha)
print('Optimal degrees per jet_class:',opt_degree)
print('Optimal lambdas per jet_class:',opt_lambda)
print('Maximum accuracy predicted per jet_class:',accuracy)
params_per_method.append([opt_alpha, opt_degree, opt_lambda, accuracy])

  w = w - gamma * gradient


Optimal alphas per jet_class: [9.0, 7.0, 4.0, 9.0]
Optimal degrees per jet_class: [3.0, 3.0, 3.0, 3.0]
Optimal lambdas per jet_class: [-1.0, -1.0, -1.0, -1.0]
Maximum accuracy predicted per jet_class: [0.8257666746737128, 0.7836841019292273, 0.7884237479902341, 0.7733712326294894]


In [7]:
# LEAST SQUARES
method = methods[2]
lambdas_candidates = None
    
opt_degree, opt_lambda, opt_alpha, accuracy = select_parameters_jet(y,x_train, method, degrees_candidates,
                                                                  alphas_candidates,K, SEED, lambdas_candidates)
print('Optimal alphas per jet_class:',opt_alpha)
print('Optimal degrees per jet_class:',opt_degree)
print('Optimal lambdas per jet_class:',opt_lambda)
print('Maximum accuracy predicted per jet_class:',accuracy)
params_per_method.append([opt_alpha, opt_degree, opt_lambda, accuracy])

Optimal alphas per jet_class: [3.0, 3.0, 3.0, 4.0]
Optimal degrees per jet_class: [7.0, 7.0, 7.0, 6.0]
Optimal lambdas per jet_class: [-1.0, -1.0, -1.0, -1.0]
Maximum accuracy predicted per jet_class: [0.843121947313636, 0.8075544207159805, 0.8191309871176481, 0.8323858509294352]


In [8]:
# RIDGE REGRESSION
method = methods[3]
lambdas_candidates = [2.5e-05, 1e-06, 1e-07]
    
opt_degree, opt_lambda, opt_alpha, accuracy = select_parameters_jet(y,x_train, method, degrees_candidates,
                                                                  alphas_candidates,K, SEED, lambdas_candidates)
print('Optimal alphas per jet_class:',opt_alpha)
print('Optimal degrees per jet_class:',opt_degree)
print('Optimal lambdas per jet_class:',opt_lambda)
print('Maximum accuracy predicted per jet_class:',accuracy)
params_per_method.append([opt_alpha, opt_degree, opt_lambda, accuracy])

Optimal alphas per jet_class: [3.0, 3.0, 3.0, 4.0]
Optimal degrees per jet_class: [7.0, 7.0, 7.0, 6.0]
Optimal lambdas per jet_class: [2.5e-05, 1e-06, 2.5e-05, 2.5e-05]
Maximum accuracy predicted per jet_class: [0.8431419649291376, 0.8076317961415453, 0.8192103852795807, 0.8323858509294352]


In [9]:
# LOGISTIC REGRESSION
method = methods[4]
lambdas_candidates = None
degrees_candidates = [1]
y_log = np.where(y == 1,1,0)

opt_degree, opt_lambda, opt_alpha, accuracy = select_parameters_jet(y_log,x_train, method, degrees_candidates,
                                                                  alphas_candidates,K, SEED, lambdas_candidates, log=True)
print('Optimal alphas per jet_class:',opt_alpha)
print('Optimal degrees per jet_class:',opt_degree)
print('Optimal lambdas per jet_class:',opt_lambda)
print('Maximum accuracy predicted per jet_class:',accuracy)
params_per_method.append([opt_alpha, opt_degree, opt_lambda, accuracy])

Optimal alphas per jet_class: [9.0, 9.0, 9.0, 9.0]
Optimal degrees per jet_class: [1.0, 1.0, 1.0, 1.0]
Optimal lambdas per jet_class: [-1.0, -1.0, -1.0, -1.0]
Maximum accuracy predicted per jet_class: [0.8117042997838099, 0.7000928505106777, 0.6981678874134064, 0.7082656560187691]


In [10]:
# REGULARIZED LOGISTIC REGRESSION
method = methods[5]
lambdas_candidates = [2.5e-05, 1e-06, 1e-07]
degrees_candidates = [1]
y_log = np.where(y == 1,1,0)
    
opt_degree, opt_lambda, opt_alpha, accuracy = select_parameters_jet(y_log,x_train, method, degrees_candidates,
                                                                  alphas_candidates,K, SEED, lambdas_candidates, log=True)
print('Optimal alphas per jet_class:',opt_alpha)
print('Optimal degrees per jet_class:',opt_degree)
print('Optimal lambdas per jet_class:',opt_lambda)
print('Maximum accuracy predicted per jet_class:',accuracy)
params_per_method.append([opt_alpha, opt_degree, opt_lambda, accuracy])

Optimal alphas per jet_class: [9.0, 9.0, 9.0, 9.0]
Optimal degrees per jet_class: [1.0, 1.0, 1.0, 1.0]
Optimal lambdas per jet_class: [2.5e-05, 2.5e-05, 2.5e-05, 2.5e-05]
Maximum accuracy predicted per jet_class: [0.8117042997838099, 0.7000928505106777, 0.6981678874134064, 0.7082656560187691]
