# Experiment #1-2: Derive HBRL rule set

Run the following steps.

1. Fit the model, i.e. iterate the L1 rule derivation.

2. Predict probabilities by running the rule lists.

#### Note

Tensor variable names follow the rules below.

* Those small letters to the right denote the rule vectors: s,u,r,...

* Capital letters denote tensors: X,...

* Small letters followed by an underbar "\_" denotes the rule vectors in the original primitive condition space: s_, u_, r_

* Small letters without "\_" at the tail denotes rule vectors in the reduced space: s,u,r

* Small letters preceded by "L1_" denotes layer-1 rule vectors: L1_s, L1_u, L1_r

* "s" denotes rule vectors whose elements take the value in [-inf,inf]

* "u" denotes rule vectors that are converted from "s", whose elements take the value in [0,1]

* "r" denotes the binarized rule vectors that are converted from "u", whose elements take the value in {0,1}, i.e. either 0 or 1.



In [None]:
# coding: utf-8

import os,sys,pathlib
ROOT_PATH = pathlib.Path.cwd().parent.resolve().as_posix()
sys.path.insert(0,ROOT_PATH)

import numpy as np

from hbrl_hmc import reset_gpu
from ijcai import HBRL
from utils import pickle_store, pickle_load
from utils import isnotebook

import getopt
import timeit
import toml

In [None]:
DATA_DIR    = '/media/data/ijcai_2021/'
CONFIG_PATH = ROOT_PATH + '/config/ijcai_2021/'

In [None]:
def Exp_HBRL_sampling(config_file='test_HBRL.toml',debug=100,verbose=False):

    # reset GPU
    reset_gpu()

    # initialize the dict
    results = {'classifier':[],'y_prob':[],'y_test':[],'time(sec)':[]}
    
    # load hyper parameters from config file
    config_file_path = CONFIG_PATH + config_file
    config = toml.load(config_file_path)
    dic    = config['HyperParameters']
    dpath  = config['DataPath']
    
    # load data for k-fold cross valildation
    file_name   = dpath['data_file_name']
    prefix      = dpath['data_prefix']
    if file_name:
        data        = pickle_load(directory=DATA_DIR,file_name=file_name)
        results['data_file_name'] = file_name
    else:
        data        = pickle_load(directory=DATA_DIR,prefix=prefix)
        results['data_prefix'] = prefix
    n_splits    = len(data['train']['X'])

    # hyper parameters for L1 Likelihood distribution
    dic['_eta_']      = np.asarray(dic['_eta_'])
    dic['_rho_']      = 2.0 * np.sum(dic['_eta_'])


    # learn the models and derive the label probabilities for the test data
#    for i in range(n_splits):
    for i in range(1):
        print("running {}-th round of training and validation round.".format(i))
        # load data
        X_train = data['train']['X'][i]
        y_train = data['train']['y'][i]
        X_test  = data['test']['X'][i]
        y_test  = data['test']['y'][i]

        # set the dimension parameters
        (K_train,N_train)   = X_train.shape
        (N_train_y,L_train) = y_train.shape
        (K_test,N_test)     = X_test.shape
        (N_test_y,L_test)   = y_test.shape
        dic['K'] = K = K_train
        dic['L'] = L = L_train
        assert(K_train==K_test)
        assert(L_train==L_test)
        assert(N_train==N_train_y)
        assert(N_test==N_test_y)

        # hyper parameters for L1 Prior distribution
        dic['_mu_']        = 0.10 * dic['_zeta_'] / K  # we have to manually adjust the scale factor
        dic['L0_pos']      = 0            # first set the L0 layer rule position to 0 
        dic['s_prev_']     = None         # No s_prev as L0_pos=0  

        if verbose:
            for keys in dic.keys():
                print("dic[\'{}\']\t={}".format(keys,dic[keys]))
    
        # set the start time
        starttime = timeit.default_timer()

        # run model training and test
        model  = HBRL(dic,debug=debug,verbose=verbose)
        model  = model.fit(X_train,y_train)
        y_prob = model.predict_proba(X_test)

        #store the results
        results['classifier'].append(model)
        results['y_prob'].append(y_prob)
        results['y_test'].append(y_test)    
        results['time(sec)'].append(timeit.default_timer() - starttime)

    # set parameters
    N_total     = X_train.shape[1] + X_test.shape[1]  # the sizes are the same for all cross-validation folds
    K_vecsize   = dic['K_target']    # # of primitive rules to search for at each L1 rules
    R_rulesize  = dic['max_rules']   # maximum # of L1 rules to stop the search
    U_minrules  = dic['min_rules']   # minmum # of rules left to stop the search
    M_minsample = dic['min_samples'] # minimum # of samples to continue searching for another L1 rule
    A_Alternate = dic['alternate_eta']  # if true, then use alternate _eta_ to capture both positives and negatives
    S_nSamples  = dic['nSamples']    # # of samples at MCMC
    T_nTune     = dic['nTune']       # # of tune steps at MCMC
 
    # store the trace file
    if pickle_store(results,directory=DATA_DIR,module_name='ijcai_2021',prefix='HBRL_N={}_K={}_R={}_U={}_M={}_A={}_S={}_T={}'.format(N_total,K_vecsize,R_rulesize,U_minrules,M_minsample,A_Alternate,S_nSamples,T_nTune)):
        print('experiment results stored successfully')

    pass

In [None]:
def print_usage(script_name=None):

    """ 
    Print the usage of this module

    Parameters
    ----------

    Returns
    -------

    """

    print('usage: {}.py \n\
    --config_file <config_file name>\n\
    --debug <debug level>\n\
    --verbose <True/False>'.format(script_name))

    pass



In [None]:
def main(args=('','')):
    '''
    Execute the procedures below
    '''
    # get this script file name
    script_name = args[0]
    if "Jupyter" in script_name:
        print('this is from a jupyter nootbook')
        Exp_HBRL_sampling(config_file='test_HBRL.toml',debug=100,verbose=False)
        return

    args = args[1:]
    
    # read the parameters
    params = {'config_file':"test.toml",'debug':0,'verbose':False}
    try:
        opts, _args = getopt.getopt(args,"h",['config_file=','debug=','verbose='])
    except getopt.GetoptError:
        print('args={}'.format(args))
        print_usage(script_name=script_name)
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print_usage(script_name=script_name)
            sys.exit()
        elif opt in ("--config_file"): params['config_file'] = arg
        elif opt in ("--debug"):       params['debug']       = int(arg)
        elif opt in ("--verbose"):     params['verbose']     = True if "True" in arg else False
        else:
            pass
    print("{}(config_file={},debug={},verbose={}".format(script_name,params['config_file'],params['debug'],params['verbose']))
    
    # execute the sampling
    Exp_HBRL_sampling(config_file=params['config_file'],debug=params['debug'],verbose=params['verbose'])

    return

In [None]:
if __name__ == "__main__":
    
    if isnotebook():
        main(args=(("Jupyter notebook","")))
    else:
        main(sys.argv)
