# Parameters Tune
In this notebook, we perform a parameter search for the following methods. 
We also optionally record the test results
to an Excel sheet and pickle file for later analysis.

| Method      | Parameters |
|:-           | :-         |
|TBAC         | Aggregation, PC siginificant value(only in IBM)|
| MonitorRank | Aggregation, Test round, Backward edge coefficient, PC siginificant value(only in IBM)|
| CloudRanger | Aggregation, PC siginificant value, Test round, Second order mixture coefficient, Backward edge coefficient |
| NetMedic    | History range start, History len, Current range start, current len, Bin size |
| DyCause | Aggregation, Start time(optional), Before length, After length, Interval step, Granger siginificant value, Lag, Adaptive threshold ratio, Backtrace max path length, Prob mean method, Topk path number, Number of selected node |

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
from openpyxl import load_workbook
from tqdm import tqdm
import pickle

from main_cloud_ranger import test_cloud_ranger
from main_dycause import test_dycause as test_granger_extend
from main_tbac import test_tbac
from main_monitor_rank import test_monitor_rank

# from main_facgraph import test_facgraph
from main_netmedic import test_netmedic

## Select dataset
For the IBM incident dataset, the running time for these methods is quite long. 

For example, for CloudRanger, it may take more than 1 day. 

If you are not intended to reproduce the exact results, we don't recommand trying them.

The workbook `ComparisonExpLog.xlsx` and the sheets `Parameter Analysis(IBM)`, `Comparison Experiments(Pymicro)` must be created before running the code.

In [2]:
workbook_name = 'ComparisonExpLog.xlsx'
# sheet_name = 'Comparison Experiments(Pymicro)'
sheet_name = 'Parameter Analysis(IBM)'

# pymicro test suite
# print("\n{:!^80}\n".format(" Pymicro Test Suite "))
# dataset_name = 'pymicro'
# entry_point_list = [16]
# true_root_cause = [1]

# ibm_micro_service test suite
print("\n{:!^80}\n".format(" IBM Micro Service Test Suite "))
dataset_name = 'ibm_micro_service'
entry_point_list = [14]
true_root_cause = [6, 28, 30, 31]
# true_root_cause_1 = [28]


!!!!!!!!!!!!!!!!!!!!!!!!! IBM Micro Service Test Suite !!!!!!!!!!!!!!!!!!!!!!!!!



## TBAC Parameter Tuning

In [None]:
verbose = False
wb = load_workbook(workbook_name)
sheet = wb[sheet_name]

row = 3
for ela in range(1, 21):
    for alpha in [0.05, 0.1, 0.2]:
        prks, acc = test_tbac(
            dataset_name,
            ela=ela, 
            alpha=alpha,
            frontend=entry_point_list[0], 
            true_root_cause=true_root_cause,
            verbose=verbose)
        sheet.cell(row=row, column=1, value=ela)
        sheet.cell(row=row, column=2, value=alpha)
        for i, prk in enumerate(prks):
            sheet.cell(row=row, column=3+i, value=prk)
        sheet.cell(row=row, column=8, value=np.mean(prks))
        sheet.cell(row=row, column=9, value=acc)
        row += 1
wb.save(workbook_name)

## MonitorRank Tuning

In [None]:
verbose=False
results = []
for ela in range(1, 21):
    for testround in [5]:
        for rho in [0.2, 0.4, 0.6, 0.8]:
            prks, acc = test_monitor_rank(
                dataset_name,
                ela=ela,
                testrun_round=testround,
                frontend=entry_point_list[0],
                true_root_cause=true_root_cause,
                rho=rho,
                save_data_fig=False,
                verbose=verbose,
            )
            results.append({'ela': ela, 'testround': testround, 'rho': rho, 'prks': prks, 'acc': acc})

In [9]:
wb = load_workbook(workbook_name)
sheet = wb[sheet_name]
row = 3
index = 0
for ela in range(1, 21):
    for testround in [5]:
        for rho in [0.2, 0.4, 0.6, 0.8]:
            result = results[index]
            sheet.cell(row=row, column=12, value=result['ela'])
            sheet.cell(row=row, column=13, value=result['testround'])
            sheet.cell(row=row, column=14, value=result['rho'])
            for i, prk in enumerate(result['prks']):
                sheet.cell(row=row, column=15+i, value=prk)
            sheet.cell(row=row, column=20, value=np.mean(result['prks']))
            sheet.cell(row=row, column=21, value=result['acc'])
            row += 1
            index += 1
wb.save(workbook_name)

## CloudRanger tuning

In [None]:
verbose = False
results = []
pbar = tqdm(total=20*5*3*4, ascii=True)
for ela in range(1, 21):
    for alpha in [0.1, 0.2, 0.3, 0.4, 0.5]:
        for testround in [5]:
            for beta in [0.1, 0.5, 0.9]:
                for rho in [0.2, 0.4, 0.6, 0.8]:
                    prks, acc = test_cloud_ranger(
                        data_source=dataset_name,
                        pc_aggregate=ela,
                        pc_alpha=alpha,
                        testrun_round=testround,
                        frontend=entry_point_list[0],
                        true_root_cause=true_root_cause,
                        beta=beta,
                        rho=rho,
                        save_data_fig=False,
                        verbose=verbose,
                    )
                    results.append({'ela': ela, 'alpha': alpha, 'testround': testround, 
                                    'beta': beta, 'rho': rho, 'prks': prks, 'acc': acc})
                    pbar.update(1)

### Load the external result pickle obtained from `cloudranger_params_tune.py`

In [5]:
# with open('cloudranger_parameter_tune_ibm.pkl', 'rb') as f:
#     results = pickle.load(f)

In [7]:
wb = load_workbook(workbook_name)
sheet = wb[sheet_name]
row = 3
column_base = 24
for result in results:
    sheet.cell(row=row, column=column_base, value=result['ela'])
    sheet.cell(row=row, column=column_base+1, value=result['alpha'])
    sheet.cell(row=row, column=column_base+2, value=result['testround'])
    sheet.cell(row=row, column=column_base+3, value=result['beta'])
    sheet.cell(row=row, column=column_base+4, value=result['rho'])
    for i, prk in enumerate(result['prks']):
        sheet.cell(row=row, column=column_base+5+i, value=prk)
    sheet.cell(row=row, column=column_base+10, value=np.mean(result['prks']))
    sheet.cell(row=row, column=column_base+11, value=result['acc'])
    row += 1
wb.save(workbook_name)

## NetMedic tuning

In [None]:
verbose = False
results = []
pbar = tqdm(total=3*2*2*2*3, ascii=True)
# for hist_start in [0, 200, 400]:
#     for hist_len in [200, 400]:
#         for current_start in [800, 1000]:
#             for current_len in [200, 400]:
#                 for bin_size in [50, 100, 150]:
for hist_start in [0, 2000, 4000]:
    for hist_len in [200, 400]:
        for current_start in [4600, 5000]:
            for current_len in [200, 400]:
                for bin_size in [10, 30, 50]:
                    prks, acc = test_netmedic(
                        data_source=dataset_name,
                        history_range=(hist_start, hist_start+hist_len),
                        current_range=(current_start, current_start+current_len),
                        bin_size=bin_size,
                        affected_node=entry_point_list[0],
                        true_root_cause=true_root_cause,
                        verbose=verbose,
                        disable_print=True
                    )
                    results.append({'hist_start': hist_start, 'hist_len': hist_len, 
                                    'current_start': current_start, 'current_len': current_len,
                                    'bin_size': bin_size, 
                                    'prks': prks, 'acc': acc})
                    pbar.update(1)

In [7]:
wb = load_workbook(workbook_name)
sheet = wb[sheet_name]
row = 3
column_base = 38
for result in results:
    sheet.cell(row=row, column=column_base, value=result['hist_start'])
    sheet.cell(row=row, column=column_base+1, value=result['hist_len'])
    sheet.cell(row=row, column=column_base+2, value=result['current_start'])
    sheet.cell(row=row, column=column_base+3, value=result['current_len'])
    sheet.cell(row=row, column=column_base+4, value=result['bin_size'])
    for i, prk in enumerate(result['prks']):
        sheet.cell(row=row, column=column_base+5+i, value=prk)
    sheet.cell(row=row, column=column_base+10, value=np.mean(result['prks']))
    sheet.cell(row=row, column=column_base+11, value=result['acc'])
    row += 1
wb.save(workbook_name)

## DyCause tuning

In [10]:
1 * (4 * 4 -1 )* 3 * 1 * 3 * 3 * 1 * 2 * 3

2430

In [None]:
verbose = False
# wb = load_workbook('ComparisonExpLog-2020,04,27.xlsx')
# sheet = wb['Comparison Experiments']

result_list = []
param_list = []

i = 0
for aggre_delta in range(1, 2):
    for before_length in [0, 100, 200, 300]:
        for after_length in [0, 100, 200, 300]:
            for step in [50, 60, 70]:
                for sig_value in [0.1]:
                    for lag in [5, 10, 15]:
                        for thres in [0.5, 0.7, 0.9]:
                            for max_path_length in [33]:
                                for mean_method in ['arithmetic', 'geometric', 'harmonic'][2:3]:
                                    for topk_path in [50, 150]:
                                        for num_sel_node in range(1, 4):
                                            if before_length != 0 or after_length != 0:
                                                param_list.append({
                                                    'ela': aggre_delta,
                                                    'bef': before_length, 
                                                    'aft': after_length, 
                                                    'step': step, 
                                                    'sig_value': sig_value,
                                                    'lag': lag, 
                                                    'thres': thres, 
                                                    'max_path_length': max_path_length,
                                                    'mean_method': mean_method,
                                                    'topk_path': topk_path,
                                                    'num_sel_node': num_sel_node,
                                                })


pbar = tqdm(total=len(param_list), ascii=True)
for i, params_dict in enumerate(param_list):
    if i<162:
        continue
    prks, acc = test_granger_extend(
        # Data params
        data_source=dataset_name,
        aggre_delta=params_dict['ela'],
        start_time=4653,
        before_length=params_dict['bef'],
        after_length=params_dict['aft'],
        # Granger interval based graph construction params
        step=params_dict['step'],
        significant_thres=params_dict['sig_value'],
        lag=params_dict['lag'],
        auto_threshold_ratio = params_dict['thres'],
        # Root cause analysis params
        max_path_length=params_dict['max_path_length'],
        mean_method=params_dict['mean_method'],
        topk_path = params_dict['topk_path'],
        num_sel_node = params_dict['num_sel_node'],
        testrun_round=1,
        frontend=entry_point_list[0],
        true_root_cause=true_root_cause,
        # Debug params
        plot_figures=False,
        verbose=True,
        disable_print=False
    )
    pbar.update(1)
    result_list.append(params_dict)
    result_list[-1]['prks'] = prks
    result_list[-1]['acc'] = acc

### Load the external result pickle obtained from `dycause_params_tune.py`

In [32]:
import pickle

with open('granger_extend_parameter_tune_ibm.pkl', 'rb') as f:
    results = pickle.load(f)
len(results)

405

In [22]:
results[0]

{'ela': 6,
 'bef': 0,
 'aft': 100,
 'step': 50,
 'sig_value': 0.1,
 'lag': 5,
 'thres': 0.7,
 'max_path_length': None,
 'mean_method': 'harmonic',
 'topk_path': 150,
 'num_sel_node': 3,
 'prks': [0.0, 0.5, 0.3333333333333333, 0.25, 0.25],
 'acc': 0.25}

### Average the performance for  each aggregation delta

In [33]:
result_key_dict = {}
for result in results:
    ela = result['ela']
    prks = result['prks']
    acc = result['acc']
    if ela in result_key_dict:
        result_key_dict[ela]['prks'].append(prks)
        result_key_dict[ela]['prk@avg'].append(np.mean(prks))
        result_key_dict[ela]['acc'].append(acc)
        result_key_dict[ela]['others'].append(result)
    else:
        result_key_dict[ela] = {}
        result_key_dict[ela]['prks'] = [prks]
        result_key_dict[ela]['prk@avg'] = [np.mean(prks)]
        result_key_dict[ela]['acc'] = [acc]    
        result_key_dict[ela]['others'] = [result]
acc_dict = {}
for key, d in result_key_dict.items():
    acc_dict[key] = (
        d['prk@avg'],
        np.max(d['acc']),
        np.argmax(d['acc'])
    )

In [24]:
for k in acc_dict:
    print('Acc: {:0.2f}'.format(acc_dict[k][1]), 'PR@k: ', ','.join(['{:0.2f}'.format(_) for _ in result_key_dict[k]['prks'][acc_dict[k][2]]]),end=' ')
    print(result_key_dict[k]['others'][acc_dict[k][2]])

Acc: 0.99 PR@k:  1.00,1.00,0.67,0.75,1.00 {'ela': 6, 'bef': 100, 'aft': 100, 'step': 50, 'sig_value': 0.1, 'lag': 5, 'thres': 0.7, 'max_path_length': None, 'mean_method': 'harmonic', 'topk_path': 150, 'num_sel_node': 1, 'prks': [1.0, 1.0, 0.6666666666666666, 0.75, 1.0], 'acc': 0.9924242424242424}
Acc: 0.99 PR@k:  1.00,1.00,0.67,0.75,1.00 {'ela': 7, 'bef': 100, 'aft': 100, 'step': 70, 'sig_value': 0.1, 'lag': 5, 'thres': 0.7, 'max_path_length': None, 'mean_method': 'harmonic', 'topk_path': 150, 'num_sel_node': 3, 'prks': [1.0, 1.0, 0.6666666666666666, 0.75, 1.0], 'acc': 0.9924242424242424}
Acc: 0.99 PR@k:  1.00,1.00,0.67,0.75,1.00 {'ela': 8, 'bef': 100, 'aft': 100, 'step': 60, 'sig_value': 0.1, 'lag': 5, 'thres': 0.5, 'max_path_length': None, 'mean_method': 'harmonic', 'topk_path': 150, 'num_sel_node': 1, 'prks': [1.0, 1.0, 0.6666666666666666, 0.75, 1.0], 'acc': 0.9924242424242424}
Acc: 0.99 PR@k:  0.00,0.50,0.67,0.75,1.00 {'ela': 9, 'bef': 100, 'aft': 0, 'step': 50, 'sig_value': 0.1, '

In [10]:
wb = load_workbook(workbook_name)
# sheet = wb['Comparison Experiments(IBM)']
sheet = wb['Parameter Analysis(IBM)']
row = 19
column_base = 1
for result in results:
# for result in result_list:
    if result['acc'] <= 0:
        continue
    sheet.cell(row=row, column=column_base, value=result['ela'])
    sheet.cell(row=row, column=column_base+1, value=4653)
    sheet.cell(row=row, column=column_base+2, value=result['bef'])
    sheet.cell(row=row, column=column_base+3, value=result['aft'])
    sheet.cell(row=row, column=column_base+4, value=result['step'])
    sheet.cell(row=row, column=column_base+5, value=result['sig_value'])
    sheet.cell(row=row, column=column_base+6, value=result['lag'])
    sheet.cell(row=row, column=column_base+7, value=result['thres'])
    sheet.cell(row=row, column=column_base+8, value=result['max_path_length'])
    sheet.cell(row=row, column=column_base+9, value=result['mean_method'])
    sheet.cell(row=row, column=column_base+10, value=result['topk_path'])
    sheet.cell(row=row, column=column_base+11, value=result['num_sel_node'])
    for i, prk in enumerate(result['prks']):
        sheet.cell(row=row, column=column_base+12+i, value=prk)
    sheet.cell(row=row, column=column_base+17, value=np.mean(result['prks']))
    sheet.cell(row=row, column=column_base+18, value=result['acc'])
    row += 1
wb.save(workbook_name)

### DyCause parameter analysis 2

In [5]:
verbose = False
# wb = load_workbook('ComparisonExpLog-2020,04,27.xlsx')
# sheet = wb['Comparison Experiments']

result_list = []
param_list = []

i = 0
for aggre_delta in range(1, 2):
    for before_length in range(0, 350, 50):
        for after_length in [200]:
            for step in [70]:
                for sig_value in [0.1]:
                    for lag in [5]:
                        for thres in [0.7]:
                            for max_path_length in [None]:
                                for mean_method in ['arithmetic', 'geometric', 'harmonic'][2:3]:
                                    for topk_path in [50]:
                                        for num_sel_node in [3]:
                                            if before_length != 0 or after_length != 0:
                                                param_list.append({
                                                    'ela': aggre_delta,
                                                    'bef': before_length, 
                                                    'aft': after_length, 
                                                    'step': step, 
                                                    'sig_value': sig_value,
                                                    'lag': lag, 
                                                    'thres': thres, 
                                                    'max_path_length': max_path_length,
                                                    'mean_method': mean_method,
                                                    'topk_path': topk_path,
                                                    'num_sel_node': num_sel_node,
                                                })


pbar = tqdm(total=len(param_list), ascii=True)
for i, params_dict in enumerate(param_list):
#     if i<162:
#         continue
    prks, acc = test_granger_extend(
        # Data params
        data_source=dataset_name,
        aggre_delta=params_dict['ela'],
        start_time=4653,
        before_length=params_dict['bef'],
        after_length=params_dict['aft'],
        # Granger interval based graph construction params
        step=params_dict['step'],
        significant_thres=params_dict['sig_value'],
        lag=params_dict['lag'],
        auto_threshold_ratio = params_dict['thres'],
        # Root cause analysis params
        max_path_length=params_dict['max_path_length'],
        mean_method=params_dict['mean_method'],
        topk_path = params_dict['topk_path'],
        num_sel_node = params_dict['num_sel_node'],
        testrun_round=1,
        frontend=entry_point_list[0],
        true_root_cause=true_root_cause,
        # Debug params
        plot_figures=False,
        verbose=False,
        disable_print=True
    )
    pbar.update(1)
    result_list.append(params_dict)
    result_list[-1]['prks'] = prks
    result_list[-1]['acc'] = acc


  0%|                                                                 | 0/7 [00:00<?, ?it/s][A
 14%|########1                                                | 1/7 [00:02<00:13,  2.29s/it][A
 29%|################2                                        | 2/7 [00:04<00:11,  2.27s/it][A
 43%|########################4                                | 3/7 [00:07<00:10,  2.51s/it][A
 57%|################################5                        | 4/7 [00:10<00:08,  2.77s/it][A
 71%|########################################7                | 5/7 [00:13<00:05,  2.73s/it][A
 86%|################################################8        | 6/7 [00:16<00:02,  2.80s/it][A
100%|#########################################################| 7/7 [00:20<00:00,  3.06s/it][A