# Hyper parameter tuning

## Load data

In [3]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

In [7]:
from proj1_helpers import *
DATA_TRAIN_PATH = 'data/train.csv'
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

## Data preprocessing

#### Split data into train and test set

In [8]:
from data_preprocessing import *
y_tr, x_tr, y_te, x_te = split_data(tX, y, 0.8, seed=20)

undefined_features = [[4, 5, 6, 12, 22, 23, 24, 25, 26,
                       27, 28, 29], [4, 5, 6, 12, 22, 26, 27, 28], [22], [22]]
PRI_jet_num = 22

#### Split train and test set into jets according to above undefined features

In [9]:
jet_train, y_jet_train, index = get_jets(x_tr, y_tr, PRI_jet_num, undefined_features, list_ = True)

In [10]:
jet_test, y_jet_test, index_te = get_jets(x_te, y_te, PRI_jet_num, undefined_features, list_ = True)

#### Data processing : standardization, normalisation, removal of correlated features

See data_preprocessing.py for further documentation

In [13]:
from data_preprocessing import preprocessing_data
jet_pr_tr = []
jet_pr_te = []
for jet in jet_train:
    jet_pr_tr.append(preprocessing_data(jet, normalization = False, standardization = False, correl = False))
    
for jet in jet_test:
    jet_pr_te.append(preprocessing_data(jet, normalization = False, standardization = False, correl = False))


## Hyperparameter tuning without regularisation

We can find optimal degree across lambdas or optimal lambda across degrees. We've choosen to find optimal degree across lambda.


**Methods :**
- **1** : Least squares
- **2** : Least squares gradient descent (least squares GD)
- **3** : Least squares stochastic gradient descent (least squares SGD)
- **5** : Logistic regression

**Screening range and input parameters** :
- *Least squares (normal equations):* degrees 1 to 20, standardized data
  - accuracy = 0.77858
  - best degrees for each jet: [4,4,9,9]
- *Least squares gradient descent:* optimal degree was 1, with standardized data, 15000 iterations, gamma = 0.000001. No need for optimisation
  - accuracy = 0.74346
  - best degree for all jets : 1
- *Least squares stochastic gradient descent:* standardized data, degrees 1 to 3  gamma = 0.000000001
  - accuracy = 0.71126
  - best parameters for all jets : 
- *Logistic regression:* normalized (only) data, degrees = np.arange(5,10,1), lambdas = np.logspace(-20,-15,1), gamma = 0.000001
  - accuracy on test set = 0.70682
  - best parameters for all jets : best degree 9 with lambda 1e-20 

**Tuning and predictions**

In [15]:
from cross_validation import choose_your_methods
from run import select_best_parameter, best_w
from utils import *

#remember best degrees for each jet
best_degs = []

#remember weight and predictions for each jet
w_list = []
y_pred_list = []

for (jet, y_jet, index) in zip(jet_pr_tr, y_jet_train, np.arange(4)):
    print("Jet n°",index)
    #find best parameter
    best_degree, acc_te, _, _ = select_best_parameter(y_jet, jet, 3, 'degree', by_accuracy = True, seed = 1 , k_fold = 5, degrees = np.arange(1,3,1), lambdas = np.array([0]),gamma = 0.000000001)
    best_degs.append(best_degree)
    
    #find optimal weight
    x_augm = build_poly(jet, best_degree)
    w = best_w(y_jet , jet , 3, 0 , best_degree)
    w_list.append(w)
    
    #compute prediction
for(jet, w, deg) in zip(jet_pr_te, w_list, best_degs):
    x_augm_te = build_poly(jet, deg)
    y_pred = x_augm_te.dot(w)
    y_pred_list.append(y_pred)
    

Jet n° 0
For method n°:1
Best degree =1, loss for k-folds cross validation=0.7229373087279484, best lambda=0
Jet n° 1
For method n°:1
Best degree =3, loss for k-folds cross validation=0.8123825812887364, best lambda=0
Jet n° 2
For method n°:1
Best degree =4, loss for k-folds cross validation=0.7800326056094004, best lambda=0
Jet n° 3
For method n°:1
Best degree =4, loss for k-folds cross validation=0.780800002924211, best lambda=0


**Compute accuracy**

In [16]:
from utils import accuracy
y_predict = combine_jets(y_pred_list, index_te)
print("accuracy on test set: ", accuracy(y_te, y_predict))

(50000,)
accuracy on test set:  0.79948


## Hyperparameter tuning with regularisation

**Methods, screening range and parameters :**
- **4**: *Ridge regression:* standardized (only) data, degrees = np.array([1,5,6,7,8,10,12]), lambdas = [0.009999, 0.1, 0.001]
  - accuracy on test set = 0.82446, optimal degrees = [10,12,] with lambdas = 0.009999
- **6**: *Regularized logistic regression:* normalized (only) data, degrees = 5 to 9, lambdas = np.logspace(-20,-15,1), gamma = 0.000001
  - accuracy on test set = 0.69024

In [5]:
from run import select_best_parameter, best_w
from utils import build_poly


best_degs = []
best_lbds = []
w_list = []
accuracy_jet = []


for (jet, y_jet, index) in zip(jet_pr_tr, y_jet_train, np.arange(4)):
    print("Jet n°:", index)
    best_degree, rmse_te, best_lambda, accuracy_deg = select_best_parameter(y_jet, jet, 4 , 'degree', seed = 200, k_fold = 5, degrees = np.arange(2,7,1), lambdas = [0.0099, 0.00001, 0.01])
    best_degs.append(best_degree)
    best_lbds.append(best_lambda)
    
    accuracy_jet.append(accuracy_deg)
    
    x_augm = build_poly(jet, best_degree)
    w = best_w(y_jet , jet , 4, best_lambda , best_degree)
    w_list.append(w)

NameError: name 'jet_process_tr' is not defined

In [None]:
y_pred_list = []
for(jet, w, deg) in zip(jet_pr_te, w_list, best_degs):
    x_augm_t = build_poly(jet, deg)
    y_pred_ = x_augm_t.dot(w)
    y_pred_list.append(y_pred_)

In [None]:
y_predict = combine_jets(y_pred_list, index_te)
print("Accuracy on test set:", accuracy(y_te, y_predict))

## Cross validation plot example

#### Ridge, jet 1 (processed)

In [2]:
from cross_validation import select_best_degree
best_degree, rmse_te, best_lambda, accuracy_deg = select_best_degree(y_jet_train[1], jet_pr_tr[1], 4, False, 200, 5, np.arange(1,7,1), np.logspace(-7,-2,3))  

NameError: name 'y_jet_train' is not defined