# Hyper parameter tuning

## Load data

In [1]:
# Useful starting lines
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
%load_ext autoreload
%autoreload 2

In [2]:
from proj1_helpers import *
DATA_TRAIN_PATH = 'data/train.csv'
y, tX, ids = load_csv_data(DATA_TRAIN_PATH)

## Data preprocessing

#### Split data into train and test set

In [3]:
from data_preprocessing import *
y_tr, x_tr, y_te, x_te = split_data(tX, y, 0.8, seed=20)

undefined_features = [[4, 5, 6, 12, 22, 23, 24, 25, 26,
                       27, 28, 29], [4, 5, 6, 12, 22, 26, 27, 28], [22], [22]]
PRI_jet_num = 22

#### Split train and test set into jets according to above undefined features

In [4]:
jet_train, y_jet_train, index = get_jets(x_tr, y_tr, PRI_jet_num, undefined_features, list_ = True)

In [5]:
jet_test, y_jet_test, index_te = get_jets(x_te, y_te, PRI_jet_num, undefined_features, list_ = True)

#### Data processing : standardization, normalisation, removal of correlated features

See data_preprocessing.py for further documentation

**Screening range and input parameters** :
- Least squares (normal equations): False; True; False (need standardization)
- Least squares Gradient Descent: False; True; False
- Least squares stochastic gradient descent: False; True; False
- Ridge Regression: False; True; False
- Logistic regression: True,False,False
- Regularized Logistic regression: True,False,False

In [6]:
from data_preprocessing import preprocessing_data
jet_pr_tr = []
jet_pr_te = []
for jet in jet_train:
    jet_pr_tr.append(preprocessing_data(jet, normalization = True, standardization = True, correl = True))
    
for jet in jet_test:
    jet_pr_te.append(preprocessing_data(jet, normalization = True, standardization = True, correl = True))


## Hyperparameter tuning without regularisation

We can find optimal degree across lambdas or optimal lambda across degrees. We've choosen to find optimal degree across lambda.


**Methods :**
- **1** : Least squares
- **2** : Least squares gradient descent (least squares GD)
- **3** : Least squares stochastic gradient descent (least squares SGD)
- **5** : Logistic regression

**Screening range and input parameters** :
- *Least squares (normal equations):* degrees 1 to 20, standardized data
  - accuracy = 0.77858
  - best degrees for each jet: [4,4,9,9]
- *Least squares gradient descent:* optimal degree was 1, with standardized data, 15000 iterations, gamma = 0.000001. No need for optimisation
  - accuracy = 0.74346
  - best degree for all jets : 1
- *Least squares stochastic gradient descent:* standardized data, degrees 1 to 3  gamma = 0.000000001
  - accuracy = 0.74274
  - best parameters for all jets : [1,1,2,4]
- *Logistic regression:* normalized (only) data, degrees = np.arange(5,10,1), lambdas = np.logspace(-20,-15,1), gamma = 0.000001
  - accuracy on test set = 0.70682
  - best parameters for all jets : best degree 9 with lambda 1e-20 2500 iterations

**Tuning and predictions**

In [None]:
from cross_validation import choose_your_methods
from proj1_helpers import select_best_parameter, best_w
from optimized_utils import *

#remember best degrees for each jet
best_degs = []

#remember weight and predictions for each jet
w_list = []
y_pred_list = []

for (jet, y_jet, index) in zip(jet_pr_tr, y_jet_train, np.arange(4)):
    print("Jet n°",index)
    #find best parameter
    best_degree, acc_te, _, _ = select_best_parameter(y_jet, jet, 1, 'degree', by_accuracy = True, seed = 1 , k_fold = 5, degrees = np.arange(1,20,1), lambdas = np.array([0]))
    best_degs.append(best_degree)
    
    #find optimal weight
    x_augm = build_poly(jet, best_degree)
    w = best_w(y_jet , jet , 1, 0 , best_degree)
    w_list.append(w)
    
    #compute prediction
for(jet, w, deg) in zip(jet_pr_te, w_list, best_degs):
    x_augm_te = build_poly(jet, deg)
    y_pred = x_augm_te.dot(w)
    y_pred_list.append(y_pred)
    

**Compute accuracy**

In [None]:
from optimized_utils import accuracy
y_predict = combine_jets(y_pred_list, index_te)
print("accuracy on test set: ", accuracy(y_te, y_predict))

## Hyperparameter tuning with regularisation

**Methods, screening range and parameters :**
- **4**: *Ridge regression:* standardized (only) data, degrees = np.array([1,5,6,7,8,10,12]), lambdas = [0.009999, 0.1, 0.001]
  - accuracy on test set = 0.82258, optimal degrees = [12] with lambdas = [0.001, 0.009999, 0.001, 0.009999]
- **6**: *Regularized logistic regression:* normalized (only) data, degrees = 5 to 9, lambdas = np.logspace(-20,-15,1), gamma = 0.000001
  - accuracy on test set = 0.65798

In [None]:
from proj1_helpers import select_best_parameter, best_w
from optimized_utils import build_poly


best_degs = []
best_lbds = []
w_list = []


for (jet, y_jet, index) in zip(jet_pr_tr, y_jet_train, np.arange(4)):
    print("Jet n°:", index)
    best_degree, rmse_te, best_lambda,_ = select_best_parameter(y_jet, jet, 6 , 'degree', by_accuracy=False, seed = 1, k_fold = 5, degrees = np.arange(5,9,1), lambdas = np.logspace(-25,-20,1), gamma = 0.000001)
    best_lbds.append(best_lambda)
    best_degs.append(best_degree)
    
    x_augm = build_poly(jet, best_degree)
    w = best_w(y_jet , jet , 6, best_lambda , best_degree)
    w_list.append(w)

In [None]:
y_pred_list = []
for(jet, w, deg) in zip(jet_pr_te, w_list, best_degs):
    x_augm_t = build_poly(jet, deg)
    y_pred_ = x_augm_t.dot(w)
    y_pred_list.append(y_pred_)

In [None]:
y_predict = combine_jets(y_pred_list, index_te)
print("Accuracy on test set:", accuracy(y_te, y_predict))

## Cross validation plot example

#### Ridge, jet 1 (processed)

In [None]:
from cross_validation import select_best_degree
best_degree, rmse_te, best_lambda,_ = select_best_degree(y_jet_train[1], jet_pr_tr[1], 4, False, 200, 5, np.arange(1,7,1), np.logspace(-7,-2,3))  

#### Accuracy plot

Accuracy evaluation for different degrees with the best parameters determined by the parameters optimization methods.

In [None]:
from plots import accuracy_visualization
#Least squares accuracies
acc_LS = [0.70876, 0.74764, 0.74754, 0.74128, 0.71198, 0.68274, 0.69782, 0.54796, 0.4796, 0.62668, 0.46492, 0.56832, 0.47284, 0.57026]

#Ridge regression accuracies
acc_R = [0.7635, 0.78948, 0.7994, 0.8055, 0.80728, 0.80928, 0.81468, 0.82088, 0.823, 0.82204, 0.8232, 0.82524, 0.7631, 0.8148]

#Logistic regression accuracies
acc_Logi=[0.66536,0.66536,0.57554,0.66536,0.57554,0.66536,0.57554,0.66536,0.57554,0.66536,0.57554,0.66536,0.57554,0.66536]

accuracy_visualization(acc_LS,acc_R,acc_Logi)