# RENT hyperparameter search with the Bayesian information criterion (BIC)

This notebook illustrates how BIC can be used to determine the elastic net parameters as well as the cutoff parameters. 

In [1]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 2000)

import sys
sys.path.append('../src')
from RENT import RENT, stability

import warnings
warnings.filterwarnings("ignore")

Load the dataset.

In [2]:
train_data = pd.read_csv("data/wisconsin_train.csv").iloc[:,1:]
train_labels = pd.read_csv("data/wisconsin_train_labels.csv").iloc[:,1].values
test_data = pd.read_csv("data/wisconsin_test.csv").iloc[:,1:]
test_labels = pd.read_csv("data/wisconsin_test_labels.csv").iloc[:,1:].values

The parameter BIC is set True in the RENT model initialization.

In [3]:
# Define a range of regularisation parameters C for elastic net. A minimum of at least one value is required.
my_C_params = [0.1, 1, 10]

# Define a reange of l1-ratios for elastic net.  A minimum of at least one value is required.
my_l1_ratios = [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1]

# Define setting for RENT
model = RENT.RENT_Classification(data=train_data, 
                                 target=train_labels, 
                                 feat_names=train_data.columns, 
                                 C=my_C_params, 
                                 l1_ratios=my_l1_ratios,
                                 autoEnetParSel=True,
                                 BIC=True,
                                 poly='OFF',
                                 testsize_range=(0.25,0.25),
                                 scoring='mcc',
                                 classifier='logreg',
                                 K=100,
                                 random_state=0,
                                 verbose=1)

data dimension: (399, 30)  data type: <class 'pandas.core.frame.DataFrame'>
target dimension: (399,)
regularization parameters C: [0.1, 1, 10]
elastic net l1_ratios: [0, 0.1, 0.25, 0.5, 0.75, 0.9, 1]
poly: OFF
number of models in ensemble: 100
random state: 0
verbose: 1


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 8 concurrent workers.


classifier: logreg
scoring: mcc


[Parallel(n_jobs=-1)]: Done   7 out of   7 | elapsed:    0.4s finished


The hyperparameter combination with the minimal BIC values is selected.

In [4]:
model.train()

In [5]:
model.get_BIC_matrix()

Unnamed: 0,0.1,1.0,10.0
0.0,260.603784,237.937293,235.038652
0.1,251.850404,238.18584,235.054014
0.25,221.010979,226.638275,235.077773
0.5,192.422296,215.515464,235.119253
0.75,193.187215,216.504365,235.162998
0.9,173.269467,211.131411,235.190304
1.0,169.750094,205.529354,235.208948


In [6]:
model.get_enet_params()

(0.1, 1.0)

Once the RENT model is trained, we select the cutoff hyperparameters, again with BIC.

In [7]:
cutoff_parameters = {'t1': np.arange(0.2,1.05,0.05),
                     't2': np.arange(0.2,1.05,0.05),
                     't3': [0.9, 0.95, 0.975, 0.99]}

BIC = model.BIC_cutoff_search(cutoff_parameters)

In [8]:
indices_matrix = np.stack(np.where(BIC == np.min(BIC)), axis=0)
tau1 = np.max(indices_matrix[0,:])
indices_matrix = indices_matrix[:,np.where(indices_matrix[0,:] == tau1)[0]]
tau2 = np.max(indices_matrix[1,:])
indices_matrix = indices_matrix[:,np.where(indices_matrix[1,:] == tau2)[0]]
tau3 = np.max(indices_matrix[2,:])

tau1 = np.round(cutoff_parameters['t1'][tau1], 2)
tau2 = np.round(cutoff_parameters['t2'][tau2], 2)
tau3 = np.round(cutoff_parameters['t3'][tau3], 2)

print("Tau 1:", tau1, "; Tau 2:", tau2, "; Tau 3:", tau3)

Tau 1: 1.0 ; Tau 2: 1.0 ; Tau 3: 0.99


The features are selected based on the parameters selected with the BIC cutoff hyperparameter search.

In [9]:
selected_features = model.select_features(tau_1_cutoff=tau1, tau_2_cutoff=tau2, tau_3_cutoff=tau3)

In [10]:
selected_features

array([ 7, 20, 21, 27])

In [11]:
train_data.columns[selected_features]

Index(['F8', 'F21', 'F22', 'F28'], dtype='object')