In [2]:
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import time
import sys
import re
import pickle
import itertools
import json


from rulekit.classification import RuleClassifier
from rulekit.params import Measures

from rulekit.survival import SurvivalRules

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MultiLabelBinarizer, LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn import metrics

from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, precision_recall_curve, roc_curve, auc
from sklearn.metrics import make_scorer, roc_auc_score

sys.path.append('./../src/')
from utils import *

_ = RuleClassifier()

In [5]:
data_df = pd.read_csv('./../Data/1000_features_survival_3classes.csv',
                      index_col=0).drop(['index'],axis=1)
data_df['y'].value_counts()

y
0    921
1    246
2     42
Name: count, dtype: int64

# config for hyperopts

In [4]:
# create configs
measures = ['c2', 'rss', 'correlation']
minsupp_new = range(3,15,2)


for idx, element in enumerate(itertools.product(measures, minsupp_new)):
    print(idx, element)
    config = {'measures': element[0],
              'minsupp_new': element[1]}
    with open('./RuleKit/configs/config_'+str(idx)+'.json','w') as f:
        json.dump(config, f)
    f.close()


0 ('c2', 3)
1 ('c2', 5)
2 ('c2', 7)
3 ('c2', 9)
4 ('c2', 11)
5 ('c2', 13)
6 ('rss', 3)
7 ('rss', 5)
8 ('rss', 7)
9 ('rss', 9)
10 ('rss', 11)
11 ('rss', 13)
12 ('correlation', 3)
13 ('correlation', 5)
14 ('correlation', 7)
15 ('correlation', 9)
16 ('correlation', 11)
17 ('correlation', 13)


# results of hyperopts

In [28]:
measures = ['c2', 'rss', 'correlation']
minsupp_new = range(3,15,2)

combinations = [i for i in itertools.product(measures, minsupp_new)]
ncombinations = len(combinations)



In [57]:
result_df = []

for idx, element in enumerate(combinations):
    with open('./RuleKit/results/result_config_'+str(idx)+'.pkl','rb') as f:
        result = pickle.load(f)
    f.close()
    
    df_ = pd.DataFrame(result)[['seed', 'measure', 'minsupp_new', 'time',
                                  'auroc_val', 'auprc_val','nrules', 'rules_count',
                                   'conditions_per_rule', 'induced_conditions_per_rule',
                                   'avg_rule_coverage', 'avg_rule_precision', 'avg_rule_quality',
                                   'pvalue']]

    df_['idx'] = [idx]*len(df_)
    df_['time'] = [i[0] for i in df_['time']]
    df_['auroc_val_class1'] = [i[0] for i in df_['auroc_val']]
    df_['auroc_val_class2'] = [i[1] for i in df_['auroc_val']]
    df_['auroc_val_class3'] = [i[2] for i in df_['auroc_val']]
    
    df_['auprc_val_class1'] = [i[0] for i in df_['auprc_val']]
    df_['auprc_val_class2'] = [i[1] for i in df_['auprc_val']]
    df_['auprc_val_class3'] = [i[2] for i in df_['auprc_val']]

    df_ = df_.drop(['measure','auroc_val', 'auprc_val'],axis=1)
    
    result_df = result_df + [df_.mean().to_frame().T]

result_df = pd.concat(result_df).reset_index(drop=True)
result_df

In [59]:
result_df.sort_values(['auroc_val_class3', 'auprc_val_class3'])

Unnamed: 0,seed,minsupp_new,time,nrules,rules_count,conditions_per_rule,induced_conditions_per_rule,avg_rule_coverage,avg_rule_precision,avg_rule_quality,pvalue,idx,auroc_val_class1,auroc_val_class2,auroc_val_class3,auprc_val_class1,auprc_val_class2,auprc_val_class3
12,349.333333,3.0,282.343668,88.0,88.0,25.476369,28.251609,0.266301,0.99782,0.502145,5.427885e-07,12.0,0.606533,0.598849,0.461905,0.851951,0.29348,0.016575
1,349.333333,5.0,33.607563,121.666667,121.666667,1.975027,1.977782,0.028279,1.0,0.528501,0.004807281,1.0,0.580974,0.581237,0.467619,0.867904,0.328486,0.016575
15,349.333333,9.0,138.42465,39.0,39.0,28.511547,31.691818,0.279673,0.9973,0.531824,9.235013e-06,15.0,0.569571,0.566035,0.471429,0.847868,0.282586,0.016575
13,349.333333,5.0,194.23865,60.666667,60.666667,26.809521,29.812519,0.271933,0.997442,0.515381,5.571399e-10,13.0,0.649478,0.646396,0.481746,0.866762,0.356473,0.024948
14,349.333333,7.0,187.56834,47.333333,47.333333,27.004638,29.977681,0.273389,0.997243,0.51722,0.0002285264,14.0,0.609819,0.619025,0.492698,0.857397,0.327255,0.08857
17,349.333333,13.0,118.613741,30.333333,30.333333,30.169444,33.682064,0.283346,0.996915,0.540774,3.466287e-10,17.0,0.580862,0.585586,0.50127,0.853844,0.286903,0.035098
3,349.333333,9.0,32.782981,78.333333,78.333333,2.751699,2.755866,0.038555,1.0,0.540594,0.001379589,3.0,0.604342,0.60279,0.501746,0.877995,0.373021,0.033511
2,349.333333,7.0,32.649028,95.333333,95.333333,2.335187,2.338554,0.033669,1.0,0.534423,0.003221008,2.0,0.554376,0.555243,0.503333,0.861542,0.288414,0.035597
5,349.333333,13.0,28.347661,59.0,59.0,3.621053,3.626608,0.050269,1.0,0.552339,0.0003554964,5.0,0.572267,0.588901,0.503333,0.864099,0.322358,0.035597
4,349.333333,11.0,29.012633,66.666667,66.666667,3.189655,3.189655,0.044146,1.0,0.545846,0.0005476825,4.0,0.574009,0.585179,0.507619,0.869897,0.333326,0.046904
