## Binary-class Threshold-based Rule Discovery （BTRD）

### 载入必需的库

In [7]:
# Basic models
import traceback
import numpy as np
import pandas as pd
import scipy.stats as scs

# Data operation models
from sklearn.model_selection import StratifiedKFold
from sklearn import cross_validation
from sklearn.model_selection import ParameterGrid
from sklearn.preprocessing import LabelEncoder

# Evaluation models
import sklearn.metrics as sm

### run_Config()
#### 配置参数的方法
#### 输入：None
#### 输出：
#####        data_name: string，存放数据集的文件的名称，文件格式默认为excel，与代码放在同一个文件夹内
#####        k_holdout: int，外层k折交叉验证的折数
#####        k_cv：int，内层k折交叉验证的折数
#####        pars: list，存放超参数组合的列表，结构为pars = [par1, par2,..., parN], 其中 pari =  {'hpar1':  value1, 'hpar2': value2, ...,'hparN': valueN}

In [19]:
# Configurations for the model
def run_Config():
    data_name = 'Thoracic_test.xlsx'
    k_holdout = 5
    k_cv = 5
    origin_pars = {
        'grid_indicator':['GM'], # 'F1(Macro)' and 'Acc_Macro' are also acceptable
        'indicator':['F1(Macro)','GM','Acc_Macro'],
        'chi_name': ['p','chi2'],
        'chi_value':[0.05]
    }
    pars = list(ParameterGrid(origin_pars))
    return data_name, k_holdout, k_cv, pars

### run_load_data()
#### 载入数据集的方法（注意载入数据集的特征必需是非数值或离散数值型的，如果是连续数值特征，需先经其他处理转换成符合要求的特征）
#### 输入：
#####        string，存放数据集的文件的名称，文件格式默认为excel，与代码放在同一个文件夹内
#### 输出：
#####        data: pandas.DataFrame，不带类别标记(label)的数据集，一行一个样本，一列对应一个特征，带有特征名（header_name）
#####        label: pandas.DataFrame，data数据集中每个样本对应的类别标记，一行一个类别标记，只有一列

In [9]:
# Load data
def run_load_data(data_name):
    all = pd.read_excel(data_name)
    label = all.iloc[:, -1]    # get labels
    data = all.iloc[:, :-1] # get data
    # some preprocessing...
    return data, label

### run_doubleCV()
#### 通过内外两层交叉验证选出最优模型后进行测试并返回结果的主方法，只与数据的分配方式有关，与其具体使用何种分类算法无关
#### 输入：
#####        all_fea: pandas.DataFrame，不带类别标记(label)的数据集，一行一个样本，一列对应一个特征，带有特征名（header_name）
#####        all_label: pandas.DataFrame，data数据集中每个样本对应的类别标记，一行一个类别标记，只有一列
#####        k_holdout:int，外层k折交叉验证的折数
#####        k_cv：int，内层k折交叉验证的折数
#####        pars:list，存放超参数组合的列表，结构为pars = [par1, par2,..., parN], 其中 pari =  {'hpar1':  value1, 'hpar2': value2, ...,'hparN': valueN}
#### 输出：
#####        res_list: list，存放每一次测试得到的各项度量指标值，res_list = [res1, res2, ..., resk]，其中resi = {'indicator1':value1, 'indicator2':value2, ... 'indicatorM':valueM}
#####        opt_pars_list: list，存放对应res_list结果的最优参数组合， opt_pars_list = [optpar1, optpar2, ..., optparM]， 其中optpari =  {'hpar1':  value1, 'hpar2': value2, ...,'hparN_bar': valueN_bar}，注意optpari里面的超参数可能比最初输入的多，因为一些参数也会输出，例如Rule Discovery中自适应生成的threshold的值。

In [10]:
# Run both HoldoutCV and GridSearchCV
def run_doubleCV(all_fea, all_label, k_holdout, k_cv, pars):
    # HoldoutCV
    i_t = 0
    res_list = []
    opt_pars_list = []
    all_label_num = get_SimpleNumCode(all_label)
    all_fea_num = np.ones(all_label_num.shape)
    
    holdoutcv = StratifiedKFold(n_splits = k_holdout, shuffle = True)
    for i_learn, i_test in holdoutcv.split(all_fea_num, all_label_num):
        # Times
        i_t = i_t + 1
        print 'Round ', str(i_t), ' Holdout CV----------------------'
        
        # Obtain current learning and heldout data
        learn_fea, test_fea = all_fea.iloc[i_learn,:], all_fea.iloc[i_test,:]
        learn_label, test_label = all_label.iloc[i_learn], all_label.iloc[i_test]
        
        # GridSearchCV
        j_t = 0
        optdata = {'score':0}
        for i_pars in pars:
            # times
            j_t = j_t + 1
            print 'round ', str(j_t), ' gridsearch cv----------------'
            pars_score = []
            learn_label_num = get_SimpleNumCode(learn_label)
            learn_fea_num = np.ones(learn_label_num.shape)
            
            gridcv = StratifiedKFold(n_splits = k_cv, shuffle = True)
            for i_train, i_valid in gridcv.split(learn_fea_num, learn_label_num):
                # obtain current training and validation data
                train_fea, valid_fea = learn_fea.iloc[i_train,:], learn_fea.iloc[i_valid,:]
                train_label, valid_label = learn_label.iloc[i_train], learn_label.iloc[i_valid]
                
                # learn the model
                # i_pars = {'par_name1':par1,'par_name2':par2,...,'par_nameN':parN}
                valid_pre, pars_new = run_RuleDiscovery(i_pars, train_fea, train_label, valid_fea)
                eval_dict = get_binary_eval(valid_pre, valid_label)
                grid_score = eval_dict[i_pars['grid_indicator']]
                pars_score.extend([grid_score])
            if np.mean(pars_score) > optdata['score']:
                optdata['pars'] = pars_new
                optdata['score'] = np.mean(pars_score)
        
        # Holdout testing
        # best_pars is a dict too
        best_pars = optdata['pars']
        test_pre, _ = run_RuleDiscovery(best_pars, learn_fea, learn_label, test_fea)
        
        # Evaluate the prediction
        res_now = get_binary_eval(test_pre, test_label)
        
        # Save results
        res_list.append(res_now)
        opt_pars_list.append(best_pars)
        
    return res_list, opt_pars_list

### get_SimpleNumCode()
#### 将非数值特征简单数值化的方法，例如输入为['R','G','B']，输出为[2, 1, 0]
#### 输入：
#####        y: pandas.DataFrame或np.array或list均可， 一列非数值特征，y = [fea1, fea2, ..., feaD]
#### 输出：
#####        y：pandas.DataFrame或np.array或list均可， 简单数值化后的特征，y = [fea1, fea2, ..., feaD]

In [11]:
def get_SimpleNumCode(y):
    clf = LabelEncoder()
    clf.fit(y)
    return clf.transform(y)

### run_RuleDiscovery()
#### 训练、验证及测试二分类阈值自适应Rule Discovery的主方法
#### 输入：
#####        p: dict，一组超参数字典，p =  {'hpar1':  value1, 'hpar2': value2, ...,'hparN': valueN}
#####        X: pandas.DataFrame，不带类别标记(label)的数据集，一行一个样本，一列对应一个特征，带有特征名（header_name）
#####        y: pandas.DataFrame，data数据集中每个样本对应的类别标记，一行一个类别标记，只有一列
#####        Z: pandas.DataFrame，不带类别标记(label)的数据集，一行一个样本，一列对应一个特征，带有特征名（header_name）
#### 输出：
#####        zpre: list，存放BTRD算法对当前测试数据集的预测类别标记，zpre = [label1, label2, ..., labelX]
#####        p: dict，一组经过BTRD算法更新后的超参数字典，p =  {'hpar1':  value1, 'hpar2': value2, ...,'hparN_bar': valueN_bar}

In [12]:
# Run Rule Discovery
def run_RuleDiscovery(p, X, y, Z):
    # train and validate rules
    # v-values, f-features, c-classes, t-thresholds
    # rule = [(v,f,c),(v,f,c),...(v,f,c)]
    # threshold = [(t,c),(t,c),...,(t,c)]
    rule = get_rules(X, y, p)
    pos_rule, threshold, pos_name, neg_name = get_binary_validation(X, y, rule, p)
    print 'run_RuleDiscovery:', ' pos:', pos_name, '; neg:', neg_name
    p['threshold'] = threshold
    
    # test rules
    zpre = get_binary_predict(Z, pos_rule, threshold, pos_name, neg_name)
    
    return zpre, p

### get_rules()
#### 根据训练数据集及其类别标记，生成若干条规则的方法
#### 输入：
#####        data: pandas.DataFrame，不带类别标记(label)的数据集，一行一个样本，一列对应一个特征，带有特征名（header_name）
#####        label: pandas.DataFrame，data数据集中每个样本对应的类别标记，一行一个类别标记，只有一列
#####        par: dict，一组超参数字典，p =  {'hpar1':  value1, 'hpar2': value2, ...,'hparN': valueN}
#### 输出：
#####        rule: list，存放BTRD算法对当前训练数据集生成的规则，rule = [r1, r2, ..., rR]，其中ri = (value, feature, class)，表示若在第feature个特征上取value的值，那么倾向于判定为属于第class类
### ---------------------------------------------------------------------
### get_square()
#### 根据当前特征（一列）及类别标记（一列）生成一个情形分析表（Contingency Table）的方法，表中第i行j列表示特征的第j个取值在第i个类中出现的次数
#### 输入：
#####        fea: pandas.DataFrame，一列特征，维度是当前训练数据集样本总数，带有特征名（header_name）
#####        label: pandas.DataFrame，data数据集中每个样本对应的类别标记，一行一个类别标记，只有一列
#### 输出：
#####        F: pandas.DataFrame，当前特征与类别构成的情形分析表，一行是一个类，一列是该特征包含的一个值
### ---------------------------------------------------------------------
### get_index()
#### 找出一个列表中某元素的所有下标
#### 输入：
#####        lst_now: list，目标列表
#####        name_now: element（any suitable type for a list），目标元素
#### 输出：
#####        No name: list，出现在lst_now里面的所有name_now对应的下标组成的列表
### ---------------------------------------------------------------------
### get_chi_test()
#### 根据情形分析表及超参数做卡方检测的方法
#### 输入：
#####        square: pandas.DataFrame，某个特征与类别构成的情形分析表，一行是一个类，一列是该特征包含的一个值
#####        par: dict，一组超参数字典，par =  {'hpar1':  value1, 'hpar2': value2, ...,'hparN_bar': valueN_bar}
#### 输出：
#####        chi_tag: int或boolean，1表示该特征通过检验，可以保留；0表示未通过检验，删去该特征
### ---------------------------------------------------------------------
### get_fea_rules()
#### 为每一个类产生对应规则的方法
#### 输入：
#####        square: pandas.DataFrame，某个特征与类别构成的情形分析表，一行是一个类，一列是该特征包含的一个值
#####        fea_name: string，当前特征的名字
#### 输出：
#####        rule_fea: list，存放当前特征对当前训练数据集生成的规则，rule_fea = [rf1, rf2, ..., rfFR]，其中fri = (value, current feature, class)，表示若在当前特征上取value的值，那么倾向于判定为属于第class类
### ---------------------------------------------------------------------
### get_weighted_square()
#### 
#### 输入：
#####        square: pandas.DataFrame，某个特征与类别构成的情形分析表，一行是一个类，一列是该特征包含的一个值
#### 输出：
#####        square: pandas.DataFrame，更新后的某个特征与类别构成的情形分析表，一行是一个类，一列是该特征包含的一个值，表内每个元素不再是频数，而是根据BTRD方法算出的概率

In [13]:
# Train rules
def get_rules(data, label, par):
    fea_name = data.columns.values # get names of features
    rule = []
    for i_f in fea_name:
        square_now = get_square(data[i_f], label)
        chi_tag_now = get_chi_test(square_now.values, par)
        if chi_tag_now == 0: # the current feature would not be considered(selected)
            continue 
        else:
            rule_now = get_fea_rules(square_now, i_f)
        rule.extend(rule_now)
    return rule

def get_square(fea, label):
    # this function is used to generate the contingency table for fea and class_name
    value_name = set(fea)  # get names of current features
    class_name = set(label) # get names of classes
    F = []
    for i_c in class_name:
        f = []
        for i_v in value_name:
            v_index = get_index(fea, i_v) # find index of current value
            c_index = get_index(label, i_c) # find index of current class
            f_v2c = len(set(v_index).intersection(c_index)) # frequence of each value-class pair  
            f.extend([f_v2c])
        F.extend([f])
    F = pd.DataFrame(F, columns = value_name) # transform double list into dataframe
    F = F.rename(lambda x:list(class_name)[x])
    return F

def get_index(lst_now, name_now):
    return [i for i,j in zip(range(len(lst_now)), lst_now) if j == name_now]

def get_chi_test(square, par):
    # this function is used to generate a tag according to threshold
    threshold_value = par['chi_value']
    threshold_name = par['chi_name']
    chi2, p, _, _ = scs.chi2_contingency(square)
    if threshold_name is 'chi2':
        chi_tag = 1 if threshold_value < chi2 else 0
    elif threshold_name is 'p':
        chi_tag = 1 if threshold_value < p else 0 
    return chi_tag

def get_fea_rules(square, fea_name):
    # this function is used to generate rules for each class from the current features 
    wsquare = get_weighted_square(square)
    best_class_name = wsquare.apply(np.argmax, axis = 0)
    rule_fea = [] # rules generated in current feature
    for i, i_v in enumerate(wsquare.columns):
        rule_fea.extend([(i_v, fea_name, best_class_name[i])]) # (value, feature, class)  
    return rule_fea

def get_weighted_square(square):
    r = square.apply(np.sum, axis = 1) # summation of columns
    for i in xrange(len(r)):
        square.ix[i,:] = square.ix[i,:].apply(lambda x:x/float(r[i])) # ratio of each element
    return square


### get_binary_validation()
#### 根据验证数据集及其类别标记，生成合适的少数类规则及其对应阈值的方法
#### 输入：
#####        data: pandas.DataFrame，不带类别标记(label)的数据集，一行一个样本，一列对应一个特征，带有特征名（header_name）
#####        label: pandas.DataFrame，data数据集中每个样本对应的类别标记，一行一个类别标记，只有一列
#####        rule: list，存放BTRD算法对当前训练数据集生成的规则，rule = [r1, r2, ..., rR]，其中ri = (value, feature, class)，表示若在第feature个特征上取value的值，那么倾向于判定为属于第class类
#####        par: dict，一组超参数字典，p =  {'hpar1':  value1, 'hpar2': value2, ...,'hparN': valueN}
#### 输出：
#####        pos_rule: list，存放BTRD算法对当前训练数据集生成的规则，rule = [r1, r2, ..., rR]，其中ri = (value, feature, class)，表示若在第feature个特征上取value的值，那么倾向于判定为属于第class类
#####        threshold: int，阈值，判断从rule中提取的正类的规则数目应当被满足多少条才可被判定为正类，threshold越大，判定越严格。threshold为0意味着无类别标记样本只要出现一个特征的取值符合pos_rule即被判定为正类，threshold=length(pos_rule)则意味无类别标记样本需要符合全部正类规则才能被判定为正类
#####        pos_name/neg_name：string，正负类样本类别标记的名字
### ---------------------------------------------------------------------
### get_micro_binary_predict()
#### 对当前输入的无类别标记数据使用正类规则（pos_rule）进行判别，返回每个样本符合pos_rule的条数
#### 输入：
#####        data: pandas.DataFrame，不带类别标记(label)的数据集，一行一个样本，一列对应一个特征，带有特征名（header_name）
#####        pos_rule: list，存放BTRD算法对当前训练数据集生成的规则，rule = [r1, r2, ..., rR]，其中ri = (value, feature, class)，表示若在第feature个特征上取value的值，那么倾向于判定为属于第class类
#### 输出：
#####        pre_rule_num: list，每个包含的元素分别是当前无类别标记样本的特征与当前规则匹配的条数，pre_rule_num = [match_num1, match_num2, ... match_numX]，其中match_numi是一个int
### ---------------------------------------------------------------------
### get_binary_match()
#### 计算一个无类别标记样本与一个特征匹配的条目数的方法
#### 输入：
#####        sample: list，一个无类别标记的样本，sample = [(value1, feature1), (value2, feature2), ..., (valueD, featureD)], value是特征取值，fature是特征名称，两者都是string
#####        pos_rule: list，存放BTRD算法对当前训练数据集生成的规则，rule = [r1, r2, ..., rR]，其中ri = (value, feature, class)，表示若在第feature个特征上取value的值，那么倾向于判定为属于第class类
#### 输出：
#####        match_num: int，当前无类别标记的样本与当前规则匹配的条数

In [14]:
# Valide positive rule in binary-class case
def get_binary_validation(data, label, rule, par):
    # this function is used to determine the optimal threshold for the rule of the positive class
    class_name = list(set(label))
    label_lst = list(label)
    pos_name = class_name[0] if label_lst.count(class_name[0])<=label_lst.count(class_name[1]) else class_name[1]
    neg_name = class_name[0] if class_name[1] == pos_name else class_name[1]
    pos_rule = [(j_v, j_f) for j_v, j_f, j_c in rule if j_c == pos_name] # pos_rule:[(v,f),(v,f),...,(value,fea)]
    upper_fea = len(set([k_f for _,k_f in pos_rule])) # number of features in the rule of the current class
    
    best_res = 0
    threshold = 0 # threshold is a scala
    for i_w in xrange(upper_fea):
        pre_rule_num = get_micro_binary_predict(data, pos_rule)
        pre_label = [pos_name if i_r > i_w else neg_name for i_r in pre_rule_num]
        result_now = get_binary_eval(pre_label, label)
        print 'get_binary_validation:', par['indicator'], '-', result_now[par['indicator']], 'compared with', i_w
        print 'threshold from the last round:', threshold
        print
        if result_now[par['indicator']] > best_res:
            best_res = result_now[par['indicator']]
            threshold = i_w
    return pos_rule, threshold, pos_name, neg_name

def get_micro_binary_predict(data, pos_rule):
    # this function is used to calculate the number of matched rule 
    # (belonging to the same class) for each sample.
    pre_rule_num = []
    for i_data in xrange(len(data.index)):
        sample = zip(data.iloc[i_data,:], data.columns)
        match_num = get_binary_match(sample, pos_rule)
        pre_rule_num.extend([match_num])
    return pre_rule_num

def get_binary_match(sample, pos_rule):
    match_num = 0
    for i_v, i_f in sample:
        for j_v, j_f in pos_rule:
            if i_f == j_f and i_v == j_v:
                match_num = match_num + 1
    return match_num

### get_binary_predict()
#### 根据BTRD生成的正类规则，对当前输入的测试数据集作预测的方法
#### 输入：
#####        Z: pandas.DataFrame，不带类别标记(label)的数据集，一行一个样本，一列对应一个特征，带有特征名（header_name）
#####        pos_rule: list，存放BTRD算法对当前训练数据集生成的规则，rule = [r1, r2, ..., rR]，其中ri = (value, feature, class)，表示若在第feature个特征上取value的值，那么倾向于判定为属于第class类
#####        threshold: int，阈值，判断从rule中提取的正类的规则数目应当被满足多少条才可被判定为正类，threshold越大，判定越严格。threshold为0意味着无类别标记样本只要出现一个特征的取值符合pos_rule即被判定为正类，threshold=length(pos_rule)则意味无类别标记样本需要符合全部正类规则才能被判定为正类
#####        pos_name/neg_name：string，正负类样本类别标记的名字
#### 输出：
#####        zpre: list，存放BTRD算法对当前测试数据集的预测类别标记，zpre = [label1, label2, ..., labelX]

In [15]:
# Test rules
def get_binary_predict(Z, pos_rule, threshold, pos_name, neg_name):
    print 'get_binary_predict:', ' pos:', pos_name, '; neg:', neg_name
    zpre = []
    for i in xrange(len(Z.index)):
        Z_i = zip(Z.iloc[i,:], Z.columns)
        #print 'Now:', get_binary_match(Z_i, pos_rule), 'compared with', threshold
        if get_binary_match(Z_i, pos_rule) >= threshold:
            zpre.extend([pos_name])
        else:
            zpre.extend([neg_name])
            
    return zpre

### get_binary_eval()
#### 根据输入的预测类别标记列表及真实类别标记列表，使用不同的度量指标计算其匹配程度的方法
#### 输入：
#####        p: list，存放预测类别标记，p = [pre_label1, pre_label2, ..., pre_labelX]
#####        y: list，存放真实类别标记，y = [true_label1, true_label2, ..., true_labelX]
#####        （注意：默认pre_label与true_label都是string类型，因此遇到只能处理数值型的度量指标方法时，需要对p与y先执行简单编码操作，调用get_SimpleNumCode()函数完成）
#### 输出：
#####        res_dict: dict，存放各种指标及其算出的值，res_dict = {'indicator1':value1, 'indicator2':value2, ..., 'indicatorI':valueI}

In [16]:
# Evaluate the performance
def get_binary_eval(p, y):
    # p and y are two list
    res_dict = {}
    
    p_num = get_SimpleNumCode(p)
    y_num = get_SimpleNumCode(y)
    res_dict['F1(Macro)'] = sm.f1_score(y_num, p_num, average='macro')*100
    res_dict['F1(Micro)'] = sm.f1_score(y_num, p_num, average='micro')*100
    res_dict['Acc'] = sm.accuracy_score(y_num, p_num)*100
    
    class_name = set(y)
    acc_each = []
    for i in class_name:
        p_index = get_index(p, i) # find index of current pre_label
        y_index = get_index(y, i) # find index of current true_label    
        acc_p2y = len(set(p_index).intersection(y_index))/float(len(y_index)) * 100 # acc of curren class
        acc_each.extend([(acc_p2y, i)]) # (accuracy, class)     
    res_dict['Acc_Macro'] = np.mean([a for a, c in acc_each])
    res_dict['GM'] = np.sqrt(np.prod([a for a, c in acc_each]))
    res_dict['Acc_each'] = acc_each
    
    return res_dict

### get_dict()
#### 根据输入的存放度量指标值的字典，打印显示出具体结果的方法
#### 输入：
#####        d: dict，存放各种指标及其算出的值，res_dict = {'indicator1':value1, 'indicator2':value2, ..., 'indicatorI':valueI}
#### 输出：
#####        list_final: list，形如以下内容的结果列表，包含度量指标值，及其对应的超参数配置： ['Acc_68.085106383', 'F1(Micro)_68.085106383', 'Acc_Macro_60.625', 'F1(Macro)_55.4924242424', 'GM_59.6866819316', "Acc_each_[(50.0, u'T'), (71.25, u'F')]"] with hyper-parameters: ['chi_name_chi2', 'chi_value_0.05', 'indicator_F1(Macro)', 'grid_indicator_GM', 'threshold_5']

In [17]:
def get_dict(d):
    list_final = []
    for i in d:
        list_now = i + '_' + str(d[i])
        list_final.append(list_now)
    return list_final

### Main
#### 调用内外两层CV验证BTRD方法的主方法
#### 首先使用run_Config()方法初始化超参数，接着使用run_load_data()方法载入数据集，之后使用run_doubleCV方法训练、验证并测试BTRD算法，最后打印出结果。

In [18]:
if __name__ == '__main__':
    data_name, k_holdout, k_cv, pars = run_Config()
    all_fea, all_label = run_load_data(data_name)
    res_list, opt_pars_list = run_doubleCV(all_fea, all_label, k_holdout, k_cv, pars)
    print 'Each heldout cv result:'
    print '-----------------------'
    for i, j in zip(res_list,opt_pars_list):
        print get_dict(i), 'with hyper-parameters:'
        print get_dict(j)
        print '-----------------------'

Round  1  Holdout CV----------------------
round  1  gridsearch cv----------------
get_binary_validation: F1(Macro) - 46.0431654676 compared with 0
threshold from the last round: 0

get_binary_validation: F1(Macro) - 46.0431654676 compared with 1
threshold from the last round: 0



  'precision', 'predicted', average, warn_for)


get_binary_validation: F1(Macro) - 18.3385421918 compared with 2
threshold from the last round: 0

get_binary_validation: F1(Macro) - 36.7038620799 compared with 3
threshold from the last round: 0

get_binary_validation: F1(Macro) - 55.0363880777 compared with 4
threshold from the last round: 0

get_binary_validation: F1(Macro) - 56.9960397483 compared with 5
threshold from the last round: 4

get_binary_validation: F1(Macro) - 49.1249491249 compared with 6
threshold from the last round: 5

get_binary_validation: F1(Macro) - 45.9459459459 compared with 7
threshold from the last round: 5

get_binary_validation: F1(Macro) - 46.0431654676 compared with 8
threshold from the last round: 5

get_binary_validation: F1(Macro) - 46.0431654676 compared with 9
threshold from the last round: 5

get_binary_validation: F1(Macro) - 46.0431654676 compared with 10
threshold from the last round: 5

run_RuleDiscovery:  pos: T ; neg: F
get_binary_predict:  pos: T ; neg: F
get_binary_validation: F1(Macro) - 