# Redes Neuronales - Trabajo Práctico N° 2 - Ejercicio 1 - Regresión Logística
# Notebook #4: K Folding + HyperOpt en regresión logística
En esta notebook se emplea K Folding para la optimización de un modelo de regresión logística con HyperOpt.
### Integrantes del grupo
* Kammann, Lucas Agustín
* Gaytan, Joaquín Oscar

In [1]:
import numpy as np

In [2]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [3]:
# Read database from .csv
df = pd.read_csv('../../databases/diabetes.csv', delimiter=',')

# Show first rows of data
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
from src.rl_helper import run_model_with_kfold
from src.helper import  print_metrics

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [5]:
train_dict, valid_dict, test_dict = run_model_with_kfold(df=df, test_size=0.2, folds=5, random_state=15,              
                                                          learning_rate=0.1,
                                                          tag='best3',
                                                          degree=2,
                                                          optimizer='adam',
                                                          beta_1=0.9,
                                                          beta_2=0.9,
                                                          batch_size=32,
                                                          epochs=500,
                                                          tensorboard_on=False,
                                                          checkpoints_on=True,
                                                          summary_on=False,
                                                          l1=0,
                                                          l2=1e-2
                                                         );

In [6]:
print_metrics(train_dict, valid_dict, test_dict)

------------------- Main metric -------------------
[AUC] Train: 0.8380 - Valid: 0.8613 - Test: 0.8025
---------------- Secondary metrics ----------------
[PPV] Train: 0.5517 - Valid: 0.5718 - Test: 0.4590
[NPV] Train: 0.9118 - Valid: 0.9528 - Test: 0.9013
[SEN] Train: 0.8972 - Valid: 0.9474 - Test: 0.8522
[SPE] Train: 0.5761 - Valid: 0.5908 - Test: 0.5574


# Using HyperOpt

In [17]:
import hyperopt

In [18]:
from hyperopt.pyll import scope

In [19]:
def objective(kwargs):
    """ Objective function for the hyperopt algorithm. """
    
    # Setting some fixed values
    kwargs['tensorboard_on'] = False
    kwargs['summary_on'] = False
    kwargs['epochs'] = 500
    kwargs['batch_size'] = 32
    kwargs['optimizer'] = 'adam'
    kwargs['beta_1'] = 0.9
    kwargs['beta_2'] = 0.9
    
    # Running the model with the given hyperparameters and retrieving the test set performance
    # WARNING! We're using the same valid set for both valid and test, but can be ignored, it does not affect
    # because it was something we had to do to reuse the function...
    _, eval_valid, _ = run_model_with_kfold(df=df, folds=5, test_size=0.2, random_state=15, **kwargs)
    auc_valid = eval_valid['auc']
    return (-1*auc_valid)

In [20]:
space = {
    'learning_rate': hyperopt.hp.choice('learning_rate', [1.0, 0.5, 0.1, 0.01]),
    'l1' : hyperopt.hp.choice('l1', [0, 1e-2, 1e-3, 1e-4]),
    'l2' : hyperopt.hp.choice('l2', [0, 1e-2, 1e-3, 1e-4]), 
    'degree': scope.int(hyperopt.hp.quniform('degree', 1, 6, 1))
}

In [21]:
algorithm = hyperopt.tpe.suggest

In [22]:
trials = hyperopt.Trials()
best = hyperopt.fmin(objective, space, algo=algorithm, max_evals=100, trials=trials)

100%|█████████████████████████████████████████████| 100/100 [33:48<00:00, 20.29s/trial, best loss: -0.8732455733500564]


In [24]:
best

{'degree': 2.0, 'l1': 2, 'l2': 3, 'learning_rate': 2}

In [25]:
train_dict, valid_dict, test_dict = run_model_with_kfold(df=df, test_size=0.2, folds=5, random_state=15,              
                                                          learning_rate=0.1,
                                                          tag='best7',
                                                          degree=2,
                                                          optimizer='adam',
                                                          beta_1=0.9,
                                                          beta_2=0.9,
                                                          batch_size=32,
                                                          epochs=500,
                                                          tensorboard_on=False,
                                                          checkpoints_on=True,
                                                          summary_on=False,
                                                          l1=1e-3,
                                                          l2=1e-4
                                                         );

In [26]:
print_metrics(train_dict, valid_dict, test_dict)

------------------- Main metric -------------------
[AUC] Train: 0.8615 - Valid: 0.8644 - Test: 0.7907
---------------- Secondary metrics ----------------
[PPV] Train: 0.5639 - Valid: 0.5927 - Test: 0.4648
[NPV] Train: 0.9257 - Valid: 0.9487 - Test: 0.8942
[SEN] Train: 0.9102 - Valid: 0.9428 - Test: 0.8304
[SPE] Train: 0.5958 - Valid: 0.6186 - Test: 0.5833
