# PROJECT 1: Higgs Boson

In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

# Library only used for observing the data in a more visual way.
import pandas as pd

In [2]:
# Imports our methods
import methods as md

from helpers import *

In [3]:
from proj1_helpers import *
DATA_TRAIN_PATH = '../data/train.csv'
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

In [6]:
tX_cleaned, medians_training = md.clean_data(tX)
tX_cleaned.shape

(250000, 30)

In [35]:
# normalized data set
tX_stand, mean_training, std_training = standardize(tX_cleaned)
tX_stand.shape

(250000, 31)

# Prediction

In [7]:
def split_data(y, x, seed=1):
    """split the dataset based on the split ratio."""
    # set seed
    np.random.seed(seed)
    
    # set mask
    ratio = 0.7
    msk = np.random.rand(len(y)) < ratio
    
    # training data set
    x_tr = x[msk]
    y_tr = y[msk]
    
    # test data set
    x_test = x[~msk]
    y_test = y[~msk]
    
    return x_tr, x_test, y_tr, y_test

In [8]:
def cross_validation(y, tX, gamma, lambda_, max_iters, method):
    # split data
    x_tr, x_test, y_tr, y_test = split_data(y, tX)
    
    # training
    loss = 0
    weights = []
    if method == 1:
        loss, weights = md.least_squares_GD(y_tr, x_tr, gamma, max_iters)
    elif method == 2:
        loss, weights = md.least_squares_SGD(y_tr, x_tr, gamma, max_iters)
    elif method == 3:
        loss, weights = md.least_squares(y_tr, x_tr)
    elif method == 4:
        loss, weights = md.ridge_regression(y_tr, x_tr, lambda_)
    elif method == 5:
        loss, weights = md.logistic_regression(y_tr, x_tr, gamma, max_iters)
    else:
        loss, weights = md.reg_logistic_regression(y_tr, x_tr, lambda_, gamma, max_iters)
        
    # compute prediction
    y_pred = predict_labels(weights, x_test)    
    
    # accuracy of the prediction
    N = y_test.shape[0]
    pred = np.sum(y_pred == y_test)/N
        
    return pred

# Training and testing in local

In [10]:
N = y.shape[0]

y1 = y[:N/2].copy()
y2 = y[N/2:].copy()

tX1 = tX_stand[:N/2, :].copy()
tX2 = tX_stand[N/2:, :].copy()

ids1 = ids[:N/2].copy()
ids2 = ids[N/2:].copy()

  app.launch_new_instance()


### Linear regression - gradient descent

In [11]:
max_iters_GD_test = 2000
gamma_GD_test = 1.0e-8
method = 1

loss_GD_test, weights_GD_test = md.least_squares_GD(y1, tX1, gamma_GD_test, max_iters_GD_test)
pred_GD_test = prediction(y1, tX1, gamma_GD_test, 0, max_iters_GD_test, method)

print("\nweights_GD:\n", weights_GD_test,"\n")
print("pred_GD = ", pred_GD_test)


weights_GD:
 [ -6.34201555e-06   4.63147996e-07  -6.70261634e-06  -3.27068464e-07
   3.67348572e-06   3.79811110e-06   4.00409550e-06  -3.44840608e-06
   1.75768474e-07  -2.54605629e-07   2.94478373e-06  -3.74736341e-06
   5.18586995e-06   3.29776178e-06   4.50395291e-06  -5.27806052e-08
  -6.95413922e-08  -6.22203163e-07   5.92352522e-08   4.65440484e-08
   4.30992542e-07   1.45799184e-07   2.61351359e-06   2.53523167e-06
   2.23962043e-06  -5.51348327e-08   6.45409265e-09   4.83417431e-07
   2.25987779e-08  -8.64344460e-08   2.58647892e-06] 

pred_GD =  0.69626081171


In [12]:
y_pred_GD_test = predict_labels(weights_GD_test, tX2)
n_correct_GD = np.count_nonzero((y_pred_GD_test - y2) == 0)
perc_correct_GD = n_correct_GD/(N/2) * 100
perc_correct_GD

69.5264

### Linear regression - stochastic gradient descent

In [13]:
max_iters_SGD_test = 500
gamma_SGD_test = 1.0e-8
method = 2

loss_SGD_test, weights_SGD_test = md.least_squares_SGD(y1, tX1, gamma_SGD_test, max_iters_SGD_test)
print("\nweights_SGD:\n", weights_SGD_test,"\n")

pred_SGD_test = prediction(y1, tX1, gamma_SGD_test, 0, max_iters_SGD_test, method)
print("pred_SGD = ", pred_SGD_test)


weights_SGD:
 [ -1.61772300e-04   1.18117996e-05  -1.70914219e-04  -8.33369212e-06
   9.35964518e-05   9.68181373e-05   1.02045998e-04  -8.78940266e-05
   4.55053020e-06  -6.53449611e-06   7.50036370e-05  -9.55582248e-05
   1.32203211e-04   8.40763252e-05   1.14821961e-04  -1.34808065e-06
  -1.77371751e-06  -1.58793165e-05   1.50834034e-06   1.18840745e-06
   1.09337929e-05   3.71777015e-06   6.65580926e-05   6.45738495e-05
   5.70321977e-05  -1.40777014e-06   1.63962388e-07   1.22653064e-05
   5.75975108e-07  -2.20532438e-06   6.58604698e-05] 

pred_SGD =  0.69628742515


In [14]:
y_pred_SGD_test = predict_labels(weights_SGD_test, tX2)
n_correct_SGD = np.count_nonzero((y_pred_SGD_test - y2) == 0)
perc_correct_SGD = n_correct_SGD/(N/2) * 100
perc_correct_SGD

69.53200000000001

### Least squares

In [15]:
method = 3

loss_LeastS_test, weights_LeastS_test = md.least_squares(y1, tX1)
print("\nweights_LeastS:\n", weights_LeastS_test,"\n")

pred_LeastS_test = prediction(y1, tX1, 0, 0, 0, method)
print("pred_LeastS = ", pred_LeastS_test)


weights_LeastS:
 [ -3.14910327e-01   7.62737682e-03  -2.52323185e-01  -2.59519952e-01
   7.57394818e-03   1.57894698e-02   1.02355109e-01   5.27204438e-03
   2.77845134e-01  -2.67068079e-02  -4.88895802e+02  -1.86884374e-01
   1.22185338e-01   7.57227103e-02   9.48871392e+01  -3.97835288e-03
  -2.96661612e-03   9.35204438e+01   2.06881217e-03   4.91517942e-04
   1.00187795e-01  -6.91976557e-04  -4.78042520e-02   4.60743288e-02
  -3.94640067e-02   1.59092057e-03  -2.84035309e-03  -2.20283190e-02
  -3.69745737e-04  -3.08936596e-03   4.14117624e+02] 

pred_LeastS =  0.747624750499


In [16]:
y_pred_LS_test = predict_labels(weights_LeastS_test, tX2)
n_correct_LS = np.count_nonzero((y_pred_LS_test - y2) == 0)
perc_correct_LS = n_correct_LS/(N/2) * 100
perc_correct_LS

74.4256

### Ridge regression

In [17]:
degree_RR_test = 6

tX1_poly = md.build_poly(tX1, degree_RR_test)
tX2_poly = md.build_poly(tX2, degree_RR_test)

lambda_RR_test = 1
method = 4

loss_RR_test, weights_RR_test = md.ridge_regression(y1, tX1_poly, lambda_RR_test)
print("\nweights_RR:\n", weights_RR_test,"\n")

pred_RR_test = prediction(y1, tX1_poly, 0, lambda_RR_test, 0, method)
print("pred_RR for degree ", str(degree_RR_test), " = ", pred_RR_test)


weights_RR:
 [ -5.85962811e-03  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03
  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03
  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03
  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03
  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03
  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03  -5.85962609e-03
  -5.85962597e-03  -5.85962597e-03  -5.85962597e-03  -5.85962597e-03
  -5.85962597e-03  -5.85962597e-03  -5.85962597e-03  -5.85962597e-03
   2.49953324e-02  -5.59050938e-02   1.31477675e-02   1.37304984e-02
   2.72241269e-03   3.38051817e-03  -4.38090762e-03   1.80824623e-02
  -5.03409352e-03   1.41155686e-02  -3.33809738e-02   1.90516274e-02
   5.03380674e-03   4.14616799e-02  -2.94487294e-04  -5.33443193e-04
   3.07302255e-04   7.85677711e-04   1.94683102e-04  -5.96970525e-03
   4.91920337e-04   9.50432638e-03   1.15743411e-03   1.26043622e-02
  -6.58298034e-04   

In [18]:
y_pred_RR_test = predict_labels(weights_RR_test, tX2_poly)
n_correct_RR = np.count_nonzero((y_pred_RR_test - y2) == 0)
perc_correct_RR = n_correct_RR/(N/2) * 100
perc_correct_RR

75.4368

### Logistic regression

In [19]:
max_iters_LogR_test = 2000
gamma_LogR_test = 1.0e-10
method = 5

loss_LogR_test, weights_LogR_test = md.logistic_regression(y1, tX1, gamma_LogR_test, max_iters_LogR_test)
print("\nweights_LogR:\n", weights_LogR_test,"\n")

pred_LogR_test = prediction(y1, tX1, gamma_LogR_test, 0, max_iters_LogR_test, method)
print("pred_LogR = ", pred_LogR_test)


weights_LogR:
 [ -2.03635186e-02   5.63757510e-04  -8.37827539e-03  -4.30020688e-04
   4.54063123e-03   4.71836965e-03   4.95212904e-03  -4.29015511e-03
   2.27608782e-04  -3.49101097e-04   3.61728771e-03  -4.64732208e-03
   6.44505736e-03   4.09146094e-03   5.59748243e-03  -5.49977535e-05
  -7.45166784e-05  -7.61494860e-04   9.07708095e-05   5.06966567e-05
   5.01695930e-04   1.90156976e-04   3.22003540e-03   3.10025844e-03
   2.76262061e-03  -5.21554312e-05   1.66056710e-05   5.50035604e-04
   2.29911616e-05  -7.99036941e-05   3.16167309e-03] 

pred_LogR =  0.697857618097


In [20]:
y_pred_LogR_test = predict_labels(weights_LogR_test, tX2)
n_correct_LogR = np.count_nonzero((y_pred_LogR_test - y2) == 0)
perc_correct_LogR = n_correct_LogR/(N/2) * 100
perc_correct_LogR

69.3352

### Regularized logistic regression

In [21]:
max_iters_RLogR_test = 1000
lambda_RLogR_test = 1.0e-8
gamma_RLogR_test = 2.0e-8
method = 5

loss_RLogR_test, weights_RLogR_test = md.reg_logistic_regression(y1, tX1, gamma_RLogR_test, lambda_RLogR_test, max_iters_RLogR_test)
print("\nweights_RLogR:\n", weights_RLogR_test,"\n")

pred_RLogR_test = prediction(y1, tX1, gamma_RLogR_test, lambda_RLogR_test, max_iters_RLogR_test, 6)
print("pred_RLogR = ", pred_RLogR_test)


weights_RLogR:
 [ -1.62046862e+00   3.51383433e-02  -5.86015913e-01  -2.64797315e-02
   1.93956381e-01   2.32837575e-01   2.29611750e-01  -1.98127802e-01
   1.41611305e-01  -8.62186426e-02   1.08153609e-01  -3.16539297e-01
   3.83674154e-01   2.28095363e-01   3.40292042e-01  -6.00004313e-03
  -7.19278047e-03  -5.68262549e-02   4.56611039e-03   4.19330193e-03
  -5.42266885e-02   1.07124829e-02   8.14839989e-02   8.37697784e-02
   6.66957048e-02  -1.96636552e-03  -9.11400802e-04  -5.57491272e-02
   8.31075590e-04  -3.70952943e-03   6.26558331e-02] 

pred_RLogR =  0.705123087159


In [22]:
y_pred_RLogR_test = predict_labels(weights_RLogR_test, tX2)
n_correct_RLogR = np.count_nonzero((y_pred_RLogR_test - y2) == 0)
perc_correct_RLogR = n_correct_RLogR/(N/2) * 100
perc_correct_RLogR

70.46480000000001

# Training on the entire data set, separating data by JET values

### Dividing by JET

In [66]:
tX_jet0, indexes_jet0, tX_jet1, indexes_jet1, tX_jet2, indexes_jet2, tX_jet3, indexes_jet3 = md.separating_by_jet(tX)
y_jet0 = y[indexes_jet0]
y_jet1 = y[indexes_jet1]
y_jet2 = y[indexes_jet2]
y_jet3 = y[indexes_jet3]
tX_cleaned_jet0, medians_jet0 = md.clean_data(tX_jet0)
tX_cleaned_jet1, medians_jet1  = md.clean_data(tX_jet1)
tX_cleaned_jet2, medians_jet2 = md.clean_data(tX_jet2)
tX_cleaned_jet3, medians_jet3 = md.clean_data(tX_jet3)

tX_reformed_jet0, means_jet0, stds_jet0 = md.standardize(tX_cleaned_jet0)
tX_reformed_jet1, means_jet1, stds_jet1 = md.standardize(tX_cleaned_jet1)
tX_reformed_jet2, means_jet2, stds_jet2 = md.standardize(tX_cleaned_jet2)
tX_reformed_jet3, means_jet3, stds_jet3 = md.standardize(tX_cleaned_jet3)



In [67]:
tX_reformed_jet0.shape

(99913, 21)

In [68]:
tX_reformed_jet1.shape

(77544, 24)

In [69]:
tX_reformed_jet2.shape

(50379, 31)

In [70]:
tX_reformed_jet3.shape

(22164, 31)

### Training

In [103]:
lambdas = [1e-10, 2e-10, 3e-10, 4e-10, 5e-10, 6e-10, 7e-10, 8e-10, 9e-10, 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10]
degrees = range(0, 11)

### JET == 0.0

In [102]:
pred_jet0_VEC = np.zeros((len(lambdas)*len(degrees), 1))
i = 0

method = 4

tX_reformed_jet0 = tX_cleaned_jet0.copy()
w0 = np.ones([1,tX_reformed_jet0.shape[0]])
tX_reformed_jet0 = np.insert(tX_reformed_jet0, 0, w0, axis=1)

for deg in degrees:
    for lam in lambdas:

        tX_poly_jet0 = md.build_poly(tX_reformed_jet0, deg)

        loss_jet0, weights_jet0 = md.ridge_regression(y_jet0, tX_poly_jet0, lam)
        # print("\nweights_RR:\n", weights_jet0,"\n")

        pred_jet0 = prediction(y_jet0, tX_poly_jet0, 0, lam, 0, method)
        pred_jet0_VEC[i] = pred_jet0
        i = i + 1
        if pred_jet0 > 0.82 :
            print("prediction jet0 = ", pred_jet0, " with lambda: ", str(lam), " degree: ", str(deg))

prediction jet0 =  0.825135333823  with lambda:  1e-10  degree:  2
prediction jet0 =  0.825135333823  with lambda:  2e-10  degree:  2
prediction jet0 =  0.825135333823  with lambda:  3e-10  degree:  2
prediction jet0 =  0.825135333823  with lambda:  4e-10  degree:  2
prediction jet0 =  0.825135333823  with lambda:  5e-10  degree:  2
prediction jet0 =  0.825135333823  with lambda:  6e-10  degree:  2
prediction jet0 =  0.825135333823  with lambda:  7e-10  degree:  2
prediction jet0 =  0.825135333823  with lambda:  8e-10  degree:  2
prediction jet0 =  0.825135333823  with lambda:  9e-10  degree:  2
prediction jet0 =  0.825135333823  with lambda:  1e-09  degree:  2
prediction jet0 =  0.825135333823  with lambda:  1e-08  degree:  2
prediction jet0 =  0.825168749582  with lambda:  1e-07  degree:  2
prediction jet0 =  0.825135333823  with lambda:  1e-06  degree:  2
prediction jet0 =  0.825135333823  with lambda:  1e-05  degree:  2
prediction jet0 =  0.825035086547  with lambda:  0.0001  degre

LinAlgError: Singular matrix

In [104]:
lambda_jet0 = 3e-10
method = 4

degree_jet0 = 3

tX_reformed_jet0 = tX_cleaned_jet0.copy()
w0 = np.ones([1,tX_reformed_jet0.shape[0]])
tX_reformed_jet0 = np.insert(tX_reformed_jet0, 0, w0, axis=1)

tX_poly_jet0 = md.build_poly(tX_reformed_jet0, degree_jet0)

loss_jet0, weights_jet0 = md.ridge_regression(y_jet0, tX_poly_jet0, lambda_jet0)
# print("\nweights_RR:\n", weights_jet0,"\n")

pred_jet0 = prediction(y_jet0, tX_poly_jet0, 0, lambda_jet0, 0, method)
print("pred_RR = ", pred_jet0)

pred_RR =  0.83028136069


### JET == 1.0

In [79]:
pred_jet1_VEC = np.zeros((len(lambdas)*len(degrees), 1))
i = 0

method = 4

tX_reformed_jet0 = tX_cleaned_jet0.copy()
w0 = np.ones([1,tX_reformed_jet0.shape[0]])
tX_reformed_jet0 = np.insert(tX_reformed_jet0, 0, w0, axis=1)

for deg in degrees:
    for lam in lambdas:

        tX_poly_jet1 = md.build_poly(tX_reformed_jet1, deg)

        loss_jet1, weights_jet1 = md.ridge_regression(y_jet1, tX_poly_jet1, lam)
        # print("\nweights_RR:\n", weights_jet0,"\n")

        pred_jet1 = prediction(y_jet1, tX_poly_jet1, 0, lam, 0, method)
        pred_jet1_VEC[i] = pred_jet1
        i = i + 1
        if pred_jet1 > 0.8 :
            print("prediction jet1 = ", pred_jet1, " with lambda: ", str(lam), " degree: ", str(deg))

prediction jet1 =  0.790634558222  with lambda:  1e-10  degree:  7
prediction jet1 =  0.790548399604  with lambda:  2e-10  degree:  7
prediction jet1 =  0.790548399604  with lambda:  3e-10  degree:  7
prediction jet1 =  0.790548399604  with lambda:  4e-10  degree:  7
prediction jet1 =  0.790505320295  with lambda:  5e-10  degree:  7
prediction jet1 =  0.790505320295  with lambda:  6e-10  degree:  7
prediction jet1 =  0.790462240986  with lambda:  7e-10  degree:  7
prediction jet1 =  0.790505320295  with lambda:  8e-10  degree:  7
prediction jet1 =  0.790505320295  with lambda:  9e-10  degree:  7
prediction jet1 =  0.790505320295  with lambda:  1e-09  degree:  7
prediction jet1 =  0.790462240986  with lambda:  1e-08  degree:  7
prediction jet1 =  0.790462240986  with lambda:  1e-07  degree:  7
prediction jet1 =  0.790462240986  with lambda:  1e-06  degree:  7
prediction jet1 =  0.790548399604  with lambda:  1e-05  degree:  7
prediction jet1 =  0.790634558222  with lambda:  0.001  degree

In [112]:
lambda_jet1 = 3e-10
method = 4

degree_jet1 = 9

tX_reformed_jet1 = tX_cleaned_jet1.copy()
w0 = np.ones([1,tX_reformed_jet1.shape[0]])
tX_reformed_jet1 = np.insert(tX_reformed_jet1, 0, w0, axis=1)

tX_poly_jet1 = md.build_poly(tX_reformed_jet1, degree_jet1)

loss_jet1, weights_jet1 = md.ridge_regression(y_jet1, tX_poly_jet1, lambda_jet1)
# print("\nweights_RR:\n", weights_jet2,"\n")

pred_jet2 = prediction(y_jet1, tX_poly_jet1, 0, lambda_jet1, 0, method)
print("pred_RR = ", pred_jet1)

pred_RR =  0.802826002671


### JET == 2.0

In [81]:
pred_jet2_VEC = np.zeros((len(lambdas)*len(degrees), 1))
i = 0

method = 4

#tX_reformed_jet0 = tX_cleaned_jet0.copy()
#w0 = np.ones([1,tX_reformed_jet0.shape[0]])
#tX_reformed_jet0 = np.insert(tX_reformed_jet0, 0, w0, axis=1)

for deg in degrees:
    for lam in lambdas:

        tX_poly_jet2 = md.build_poly(tX_reformed_jet2, deg)

        loss_jet2, weights_jet2 = md.ridge_regression(y_jet2, tX_poly_jet2, lam)
        # print("\nweights_RR:\n", weights_jet0,"\n")

        pred_jet2 = prediction(y_jet2, tX_poly_jet2, 0, lam, 0, method)
        pred_jet2_VEC[i] = pred_jet2
        i = i + 1
        if pred_jet2 > 0.83 :
            print("prediction jet1 = ", pred_jet2, " with lambda: ", str(lam), " degree: ", str(deg))

prediction jet1 =  0.805863235392  with lambda:  1e-10  degree:  4
prediction jet1 =  0.805863235392  with lambda:  2e-10  degree:  4
prediction jet1 =  0.805796909199  with lambda:  3e-10  degree:  4
prediction jet1 =  0.805730583007  with lambda:  4e-10  degree:  4
prediction jet1 =  0.805730583007  with lambda:  5e-10  degree:  4
prediction jet1 =  0.805730583007  with lambda:  6e-10  degree:  4
prediction jet1 =  0.805730583007  with lambda:  7e-10  degree:  4
prediction jet1 =  0.805796909199  with lambda:  8e-10  degree:  4
prediction jet1 =  0.805796909199  with lambda:  9e-10  degree:  4
prediction jet1 =  0.805796909199  with lambda:  1e-09  degree:  4
prediction jet1 =  0.805531604431  with lambda:  1e-08  degree:  4
prediction jet1 =  0.805664256815  with lambda:  1e-07  degree:  4
prediction jet1 =  0.805531604431  with lambda:  1e-06  degree:  4
prediction jet1 =  0.805597930623  with lambda:  1e-05  degree:  4
prediction jet1 =  0.80672547589  with lambda:  0.001  degree:

In [111]:
#lambda_jet2 = 1e-10
lambda_jet2 = 8e-10
method = 4

#degree_jet2 = 9
degree_jet2 = 9

tX_reformed_jet2 = tX_cleaned_jet2.copy()
w0 = np.ones([1,tX_reformed_jet2.shape[0]])
tX_reformed_jet2 = np.insert(tX_reformed_jet2, 0, w0, axis=1)

tX_poly_jet2 = md.build_poly(tX_reformed_jet2, degree_jet2)

loss_jet2, weights_jet2 = md.ridge_regression(y_jet2, tX_poly_jet2, lambda_jet2)
# print("\nweights_RR:\n", weights_jet2,"\n")

pred_jet2 = prediction(y_jet2, tX_poly_jet2, 0, lambda_jet2, 0, method)

print("pred_RR = ", pred_jet2)

pred_RR =  0.830735557472


### JET == 3.0

In [84]:
pred_jet3_VEC = np.zeros((len(lambdas)*len(degrees), 1))
i = 0

method = 4

#tX_reformed_jet0 = tX_cleaned_jet0.copy()
#w0 = np.ones([1,tX_reformed_jet0.shape[0]])
#tX_reformed_jet0 = np.insert(tX_reformed_jet0, 0, w0, axis=1)

for deg in degrees:
    for lam in lambdas:

        tX_poly_jet3 = md.build_poly(tX_reformed_jet3, deg)

        loss_jet3, weights_jet3 = md.ridge_regression(y_jet3, tX_poly_jet3, lam)
        # print("\nweights_RR:\n", weights_jet0,"\n")

        pred_jet3 = prediction(y_jet3, tX_poly_jet3, 0, lam, 0, method)
        pred_jet3_VEC[i] = pred_jet3
        i = i + 1
        if pred_jet2 > 0.79 :
            print("prediction jet1 = ", pred_jet2, " with lambda: ", str(lam), " degree: ", str(deg))

prediction jet1 =  0.823854306613  with lambda:  1e-10  degree:  8
prediction jet1 =  0.823705030602  with lambda:  2e-10  degree:  8
prediction jet1 =  0.823705030602  with lambda:  3e-10  degree:  8
prediction jet1 =  0.823705030602  with lambda:  4e-10  degree:  8
prediction jet1 =  0.823705030602  with lambda:  5e-10  degree:  8
prediction jet1 =  0.823705030602  with lambda:  6e-10  degree:  8
prediction jet1 =  0.823705030602  with lambda:  7e-10  degree:  8
prediction jet1 =  0.823705030602  with lambda:  8e-10  degree:  8
prediction jet1 =  0.823705030602  with lambda:  9e-10  degree:  8
prediction jet1 =  0.823705030602  with lambda:  1e-09  degree:  8
prediction jet1 =  0.824152858636  with lambda:  1e-08  degree:  8
prediction jet1 =  0.824152858636  with lambda:  1e-07  degree:  8
prediction jet1 =  0.824451410658  with lambda:  1e-06  degree:  8
prediction jet1 =  0.82460068667  with lambda:  1e-05  degree:  8
prediction jet1 =  0.825347066726  with lambda:  0.0001  degree

In [110]:
#lambda_jet3 = 1e-7
lambda_jet3 = 1e-7
method = 4

#degree_jet3 = 10
degree_jet3 = 10

tX_reformed_jet3 = tX_cleaned_jet3.copy()
w0 = np.ones([1,tX_reformed_jet3.shape[0]])
tX_reformed_jet3 = np.insert(tX_reformed_jet3, 0, w0, axis=1)

tX_poly_jet3 = md.build_poly(tX_reformed_jet3, degree_jet3)

loss_jet3, weights_jet3 = md.ridge_regression(y_jet3, tX_poly_jet3, lambda_jet3)
# print("\nweights_RR:\n", weights_jet3,"\n")

pred_jet3 = prediction(y_jet3, tX_poly_jet3, 0, lambda_jet3, 0, method)
print("pred_RR = ", pred_jet3)

pred_RR =  0.830571727123


## Making the submission file

In [86]:
DATA_TEST_PATH = '../data/test.csv' 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)
tX_test.shape

(568238, 30)

In [87]:
tX_test_jet0, indexes_test_jet0, tX_test_jet1, indexes_test_jet1, tX_test_jet2, indexes_test_jet2, tX_test_jet3, indexes_test_jet3 = md.separating_by_jet(tX_test)

tX_test_cleaned_jet0, _ = md.clean_data(tX_test_jet0, medians_jet0)
tX_test_cleaned_jet1, _ = md.clean_data(tX_test_jet1, medians_jet1)
tX_test_cleaned_jet2, _ = md.clean_data(tX_test_jet2, medians_jet2)
tX_test_cleaned_jet3, _ = md.clean_data(tX_test_jet3, medians_jet3)

In [88]:
tX_test_reformed_jet0, _, _ = md.standardize(tX_test_cleaned_jet0, means_jet0, stds_jet0)

tX_test_reformed_jet1, _, _ = md.standardize(tX_test_cleaned_jet1, means_jet1, stds_jet1)

tX_test_reformed_jet2, _, _ = md.standardize(tX_test_cleaned_jet2, means_jet2, stds_jet2)

tX_test_reformed_jet3, _, _ = md.standardize(tX_test_cleaned_jet3, means_jet3, stds_jet3)

In [89]:
tX_test_poly_jet0 = md.build_poly(tX_test_reformed_jet0, degree_jet0)
tX_test_poly_jet1 = md.build_poly(tX_test_reformed_jet1, degree_jet1)
tX_test_poly_jet2 = md.build_poly(tX_test_reformed_jet2, degree_jet2)
tX_test_poly_jet3 = md.build_poly(tX_test_reformed_jet3, degree_jet3)

In [90]:
y_pred_jet0 = predict_labels(weights_jet0, tX_test_poly_jet0)
y_pred_jet1 = predict_labels(weights_jet1, tX_test_poly_jet1)
y_pred_jet2 = predict_labels(weights_jet2, tX_test_poly_jet2)
y_pred_jet3 = predict_labels(weights_jet3, tX_test_poly_jet3)

In [91]:
y_pred_final = np.ones((tX_test.shape[0], 1))
a = 0
b = 0
c = 0
d = 0
for i in range(0, y_pred_final.shape[0]):
    if indexes_test_jet0[i] == True:
        y_pred_final[i] = y_pred_jet0[a]
        a = a + 1
    if indexes_test_jet1[i] == True:
        y_pred_final[i] = y_pred_jet1[b]
        b = b + 1
    if indexes_test_jet2[i] == True:
        y_pred_final[i] = y_pred_jet2[c]
        c = c + 1
    if indexes_test_jet3[i] == True:
        y_pred_final[i] = y_pred_jet3[d]
        d = d + 1

In [92]:
y_pred_final.shape

(568238, 1)

In [93]:
OUTPUT_PATH = '../data/dataSubmission_JET_RR.csv' 
create_csv_submission(ids_test, y_pred_final, OUTPUT_PATH)

In [99]:
import run
run

<module 'run' from '/Users/davidrivollet/Programmation/PCML_project1/run.py'>