# Classifying Higgs Boson Data

In [3]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import costs
import datetime
from helpers import *
from optimization import *
from proj1_helpers import *
from crossvalidation import *
from to_create_pred import *
%load_ext autoreload
%autoreload 2

#### Load the training data into feature matrix, class labels, and event ids:

In [4]:
DATA_TRAIN_PATH = '../../Data/train.csv' 
y, x, ids = load_csv_data(DATA_TRAIN_PATH)
tx, mean_x, std_x = standardize(x)
# We standardize the data because gradient descent is very sensitive to different speed of convergence
DATA_TEST_PATH = '../../Data/test.csv' 
_, tX_test, ids_test = load_csv_data(DATA_TEST_PATH)
tx_test, mean_x, std_x = standardize(tX_test)

#### 1) We try our models without any improvement

###### Least Squares with gradient descent

I tried for lambdas np.logspace(-5, 0, 10) and it seems like the optimal for degree 1 polynomial basis is in [0.1;0.17[

In [8]:
optimal_lambda,optimal_loss_tr,optimal_loss_te = get_best_parameters_GD(y,tx)
print("The optimal lambda for GD is : ",optimal_lambda," ; loss_tr = ",optimal_loss_tr," ; loss_te = ",optimal_loss_te)

for lamb =  0.11  loss_tr =  0.824866355769  loss_te =  0.824715937735
for lamb =  0.11291549665  loss_tr =  0.824859207669  loss_te =  0.824709105286
for lamb =  0.116681005372  loss_tr =  0.824851503289  loss_te =  0.824701783923
for lamb =  0.1215443469  loss_tr =  0.824843609509  loss_te =  0.824694346624
for lamb =  0.127825594022  loss_tr =  0.824836021446  loss_te =  0.824687290248
for lamb =  0.135938136638  loss_tr =  0.824829282935  loss_te =  0.824681153227
for lamb =  0.146415888336  loss_tr =  0.824823846746  loss_te =  0.824676373042
for lamb =  0.159948425032  loss_tr =  0.824819920804  loss_te =  0.82467312327
for lamb =  0.177426368268  loss_tr =  8.02072229935e+20  loss_te =  8.01272084026e+20
Higher values of lambda will result in high divergence yielding infinite loss
The optimal lambda for GD is :  0.159948425032  ; loss_tr =  0.824819920804  ; loss_te =  0.82467312327


In [11]:
weights = least_squares_GD(y,tx,optimal_lambda,300)
generate_outputs_for_weigths(weights,tx_test,ids_test,'LeastSquaresGD')

True

###### Least squares with stochastic gradient descent

I tried for lambdas np.logspace(-5, 0, 10) and it seems like the optimal for degree 1 polynomial basis is in [0.001;0.01[

In [12]:
optimal_lambda,optimal_loss_tr,optimal_loss_te = get_best_parameters_SGD(y,tx)
print("The optimal lambda for GD is : ",optimal_lambda," ; loss_tr = ",optimal_loss_tr," ; loss_te = ",optimal_loss_te)

for lamb =  0.0021  loss_tr =  0.874438662643  loss_te =  0.874610956965
for lamb =  0.0021291549665  loss_tr =  0.879946104725  loss_te =  0.879932031232
for lamb =  0.00216681005372  loss_tr =  0.874014019209  loss_te =  0.873806336215
for lamb =  0.002215443469  loss_tr =  0.872443546047  loss_te =  0.872736720464
for lamb =  0.00227825594022  loss_tr =  0.874879572911  loss_te =  0.874799806263
for lamb =  0.00235938136638  loss_tr =  0.880989074319  loss_te =  0.881151025915
for lamb =  0.00246415888336  loss_tr =  0.873610518604  loss_te =  0.873196191409
for lamb =  0.00259948425032  loss_tr =  0.874955238278  loss_te =  0.87570775559
for lamb =  0.00277426368268  loss_tr =  0.878585853626  loss_te =  0.878800302355
for lamb =  0.003  loss_tr =  0.876667728946  loss_te =  0.87670104004
The optimal lambda for GD is :  0.002215443469  ; loss_tr =  0.872443546047  ; loss_te =  0.872736720464


In [13]:
weights = least_squares_SGD(y,tx,optimal_lambda,500)
generate_outputs_for_weigths(weights,tx_test,ids_test,'LeastSquaresSGD')

True

###### Least Squares

The idea is to simply get the same unit of comparison with gradient descent methods, we see that Least Squares yields a much better loss for the test sample on cross validation

In [14]:
cross_validate_Least_Squares(y,tx)

 loss_tr =  0.824104577101  loss_te =  0.823974545767


In [15]:
weights = least_squares(y,tx)
generate_outputs_for_weigths(weights,tx_test,ids_test,'LeastSquares')

True

###### Ridge Regression

Should improve least squares to get less complex models. Tried on np.logspace(-5,2,10) and it seems like the optimal value is in [0.1;0.11]

In [16]:
optimal_lambda,optimal_loss_tr,optimal_loss_te = get_best_parameters_Ridge(y,x)
print("The optimal lambda for GD is : ",optimal_lambda," ; loss_tr = ",optimal_loss_tr," ; loss_te = ",optimal_loss_te)

The optimal lambda for GD is :  0.19  ; loss_tr =  0.824399393203  ; loss_te =  0.824266959013


In [17]:
weights = ridge_regression(y,tx,optimal_lambda)
generate_outputs_for_weigths(weights,tx_test,ids_test,'RidgeRegression')

True

###### Logistic Regression

Problem with my ridge regression as it seems that the cost function goes into the negative value, and it should not ? 
I checked the gradient and cost computation for small values of y, tx and w

In [21]:
print(y.shape)

(250000,)


In [24]:
w = logistic_regression(y[:100000],tx[:100000],0.00002,10000)

Current iteration=0, the loss=69314.71805599453
Current iteration=500, the loss=84915.74781887252
Current iteration=1000, the loss=84857.51961439503
Current iteration=1500, the loss=84858.08767789285
Current iteration=2000, the loss=84858.29400979228
Current iteration=2500, the loss=84858.38375005157
Current iteration=3000, the loss=84858.45024057636
Current iteration=3500, the loss=84858.50672900704
Current iteration=4000, the loss=84858.55528440129
Current iteration=4500, the loss=84858.59647292414
Current iteration=5000, the loss=84858.63064902411
Current iteration=5500, the loss=84858.65812747812
Current iteration=6000, the loss=84858.67920671559
Current iteration=6500, the loss=84858.69417237196
Current iteration=7000, the loss=84858.70329820046
Current iteration=7500, the loss=84858.70684661104
Current iteration=8000, the loss=84858.70506914471
Current iteration=8500, the loss=84858.69820692108
Current iteration=9000, the loss=84858.68649106866
Current iteration=9500, the loss=84

In [10]:
optimal_lambda,optimal_loss_tr,optimal_loss_te = get_best_parameters_Logistic(y,x)
print("The optimal lambda for GD is : ",optimal_lambda," ; loss_tr = ",optimal_loss_tr," ; loss_te = ",optimal_loss_te)

Current iteration=0, the loss=144403.3521260534
Current iteration=1, the loss=inf
Current iteration=2, the loss=nan
Current iteration=3, the loss=nan
Current iteration=4, the loss=nan
Current iteration=5, the loss=nan
Current iteration=6, the loss=nan
Current iteration=7, the loss=nan
Current iteration=8, the loss=nan
Current iteration=9, the loss=nan
Current iteration=10, the loss=nan
Current iteration=11, the loss=nan
Current iteration=12, the loss=nan
Current iteration=13, the loss=nan
Current iteration=14, the loss=nan
Current iteration=15, the loss=nan
Current iteration=16, the loss=nan
Current iteration=17, the loss=nan
Current iteration=18, the loss=nan
Current iteration=19, the loss=nan
Current iteration=20, the loss=nan
Current iteration=21, the loss=nan
Current iteration=22, the loss=nan
Current iteration=23, the loss=nan


  diff = np.log(1+np.exp(tmp)) - y*tmp
  return np.exp(t) / (1 + np.exp(t))
  return np.exp(t) / (1 + np.exp(t))


Current iteration=24, the loss=nan
Current iteration=25, the loss=nan
Current iteration=26, the loss=nan
Current iteration=27, the loss=nan
Current iteration=28, the loss=nan
Current iteration=29, the loss=nan
Current iteration=30, the loss=nan
Current iteration=31, the loss=nan
Current iteration=32, the loss=nan
Current iteration=33, the loss=nan
Current iteration=34, the loss=nan
Current iteration=35, the loss=nan
Current iteration=36, the loss=nan
Current iteration=37, the loss=nan
Current iteration=38, the loss=nan
Current iteration=39, the loss=nan
Current iteration=40, the loss=nan
Current iteration=41, the loss=nan
Current iteration=42, the loss=nan
Current iteration=43, the loss=nan
Current iteration=44, the loss=nan
Current iteration=45, the loss=nan
Current iteration=46, the loss=nan
Current iteration=47, the loss=nan
Current iteration=48, the loss=nan
Current iteration=49, the loss=nan
Current iteration=50, the loss=nan
Current iteration=51, the loss=nan
Current iteration=52

KeyboardInterrupt: 