In [20]:
import json
import numpy as np

from mlrl.boosting import Boomer
from mlrl.testbed import data

from src.evaluation import Evaluator
from src.utils import pjoin, makedir, load_pickle, load_synthetic_data

In [6]:
syn_ds_names = ['1500_15_15_15', '250_5_5_5', '2000_20_20_20', '3000_30_30_30', '2500_25_25_25', '500_10_10_10']

In [35]:
output_dir = 'outputs/boomer'

ds_name = '1500_15_15_15'

In [33]:
# if use_synthetic_data:
#     true_num_rules = get_true_number_of_rules(ds_name)
#     trn_X, trn_Y, tst_X, tst_Y = load_synthetic_data(f'synthetic-datasets/{ds_name}')
# else:
#     trn_X, trn_Y, md = data.load_data_set_and_meta_data('./datasets', f'{ds_name}-train.arff', f'{ds_name}.xml')
#     tst_X, tst_Y, md = data.load_data_set_and_meta_data('./datasets', f'{ds_name}-test.arff', f'{ds_name}.xml')

In [34]:
def get_true_number_of_rules(ds_name):
    return int(ds_name.split('_')[-1])

def run_experiments_on_synthetic_data(ds_name):
    true_num_rules = get_true_number_of_rules(ds_name)
    trn_X, trn_Y, tst_X, tst_Y = load_synthetic_data(f'synthetic-datasets/{ds_name}')
    
    rule_numbers = true_num_rules * np.power(2, np.arange(4))
    perf_list = []
    for rule_num in rule_numbers:
        clf = Boomer(max_rules=rule_num)
        clf.fit(trn_X, trn_Y)
        pred_Y = clf.predict(tst_X)

        ev = Evaluator()
        perf = ev.report(pred_Y, tst_Y)
        perf['max_rules'] = int(rule_num)
        perf['num_used_rules'] = clf.model_.get_num_used_rules()

        perf_list.append(perf)
    return perf_list

In [55]:
from mlrl.common.cython.model import RuleModelVisitor
from mlrl.testbed.model_characteristics import RuleModelFormatter

In [57]:
RuleModelFormatter?

In [51]:
vis = RuleModelVisitor()
clf.model_.visit(vis)

In [37]:
for ds_name in syn_ds_names:
    print('experimenting on {}'.format(ds_name))
    perf_list = run_experiments_on_synthetic_data(ds_name)

    filename = pjoin(output_dir, ds_name + '.json')
    makedir(filename)
    with open(filename, 'w') as f:
        f.write(json.dumps(perf_list, indent=4))

experimenting on 1500_15_15_15
experimenting on 250_5_5_5
experimenting on 2000_20_20_20
experimenting on 3000_30_30_30
experimenting on 2500_25_25_25
experimenting on 500_10_10_10
