# Summary of Bayesian parameters search

Read all the parameters' search results and save the optimal ones.

Read all the sensitivity test results and save for the further visualisation.

In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
import json
import pandas as pd
import validation
from pprint import pprint

## 1 Read the logs of validation and save to a file

In [3]:
region_list = ['sweden', 'netherlands', 'saopaulo']
for region in region_list:
    file = f'../../results/para-search-r1/parasearch-n_{region}.txt'
    list_lines = []
    with open(file) as f:
        for jsonObj in f:
            line = json.loads(jsonObj)
            list_lines.append(line)
    df = pd.DataFrame(list_lines)
    df = df.loc[df['kl'] > 0, :]
    with open('../../results/para-search-r1/parasearch.txt', 'a') as outfile:
        json.dump(df.loc[df['kl'] == min(df['kl']), :].to_dict('records')[0], outfile)
        outfile.write('\n')

## 2 Run src\py\parameters-validation.py
This step generates and saves the visits using the optimal parameters on both calibration and validation datasets.

## 3 Compute KL divergence measure of the validation results

In [6]:
with open('../../results/para-search-r1/parasearch.txt', 'r') as f:
    for jsonObj in f:
        line = json.loads(jsonObj)
        region = line['region']
        # Add validation performance
        file = f'../../results/para-search-r1/{region}_validation_distances.csv'
        distances = pd.read_csv(file)
        kl = validation.DistanceMetrics().kullback_leibler(distances,
                                                           titles=['groundtruth', 'model'])
        line['kl-v'] = kl

        # Add validation performance for benchmark
        kl_baseline = validation.DistanceMetrics().kullback_leibler(distances,
                                                           titles=['groundtruth', 'benchmark'])
        line['kl-v-baseline'] = kl_baseline
        with open('../../results/summary.txt', 'a') as outfile:
            json.dump(line, outfile)
            outfile.write('\n')

In [13]:
with open('../../results/summary.txt', 'r') as outfile:
    for line in outfile:
        pprint(line)


('{"region": "sweden", "p": 0.9837273303214517, "beta": 0.017503751178316866, '
 '"gamma": 0.11895253662991521, "kl-baseline": 1.2950978492258307, "kl": '
 '0.01585126958256988, "kl-v": 0.02354793316813772}\n')
('{"region": "netherlands", "p": 0.7763441613832662, "beta": '
 '0.22682873676239296, "gamma": 0.012171103323168245, "kl-baseline": '
 '1.2757839552588923, "kl": 0.017479270659420572, "kl-v": '
 '0.019721853782483253}\n')
('{"region": "saopaulo", "p": 0.45879896443898016, "beta": '
 '0.17666806537151383, "gamma": 0.011859780066996593, "kl-baseline": '
 '0.8589528848493738, "kl": 0.0027090426934729584, "kl-v": '
 '0.00308760109916755}\n')
('{"region": "sweden", "p": 0.9810923928598705, "beta": 0.014032698838826095, '
 '"gamma": 0.23585684808119195, "kl-baseline": 0.09081822690812881, '
 '"kl-deviation": 0.008582467654668042, "kl": 0.006669178646773053, "kl-v": '
 '0.010764955875287098, "kl-v-baseline": 0.07077894402988541}\n')
('{"region": "netherlands", "p": 0.7977081692955302, 

## 4 Compute KL divergence measure of the sensitivity test results

In [27]:
file = '../../results/para-search-r1/parasearch.txt'
list_lines = []
with open(file) as f:
    for jsonObj in f:
        line = json.loads(jsonObj)
        list_lines.append(line)
df_av = pd.DataFrame(list_lines)
df_av.set_index('region', inplace=True)
df_av.loc['average'] = df_av.mean()  # Get the average value of parameters
df_av.reset_index(inplace=True)
df_av

Unnamed: 0,region,p,beta,gamma,kl-baseline,kl-deviation,kl
0,sweden,0.981092,0.014033,0.235857,0.090818,0.008582,0.006669
1,netherlands,0.797708,0.17463,0.173252,0.012129,0.025306,0.003579
2,saopaulo,0.987384,0.162742,0.192272,0.074433,,0.00272
3,average,0.922061,0.117135,0.20046,0.059127,0.016944,0.004323


In [24]:
res_list = []
for region in region_list:
    for region2cross in [x for x in region_list if x != region] + ['average']:
        for type in ['calibration', 'validation']:
            file = f'../../results/para-search-r1/sensitivity/{region}_{type}_{region2cross}_distances.csv'
            distances = pd.read_csv(file)
            kl = validation.DistanceMetrics().kullback_leibler(distances, titles=['groundtruth', 'model'])
            kl_b = validation.DistanceMetrics().kullback_leibler(distances, titles=['groundtruth', 'benchmark'])
            gain = (kl_b - kl) / kl_b * 100 # Improvement (decrease) in %
            res_list.append((region, region2cross, type, kl, kl_b, gain))
df_sens = pd.DataFrame(res_list, columns=['region', 'region2cross', 'type', 'kl', 'kl_b', 'gain'])

In [25]:
# Add its own performance
res_self_list = []
with open('../../results/summary.txt', 'r') as outfile:
    for jsonObj in outfile:
        line = json.loads(jsonObj)
        gain_c = (line['kl-baseline'] - line['kl']) / line['kl-baseline'] * 100
        res_self_list.append((line['region'], line['region'], 'calibration', line['kl'], line['kl-baseline'], gain_c))

        gain_v = (line['kl-v-baseline'] - line['kl-v']) / line['kl-v-baseline'] * 100
        res_self_list.append((line['region'], line['region'], 'validation', line['kl-v'], line['kl-v-baseline'], gain_v))
df_sens = pd.concat([pd.DataFrame(res_self_list, columns=['region', 'region2cross', 'type', 'kl', 'kl_b', 'gain']),
                     df_sens])
df_sens

Unnamed: 0,region,region2cross,type,kl,kl_b,gain
0,sweden,sweden,calibration,0.006669,0.090818,92.656564
1,sweden,sweden,validation,0.010765,0.070779,84.790737
2,netherlands,netherlands,calibration,0.003579,0.012129,70.490577
3,netherlands,netherlands,validation,0.004273,0.018083,76.370204
4,saopaulo,saopaulo,calibration,0.00272,0.074433,96.345318
5,saopaulo,saopaulo,validation,0.003498,0.140363,97.507563
0,sweden,netherlands,calibration,0.021105,0.090818,76.761636
1,sweden,netherlands,validation,0.016904,0.070779,76.116809
2,sweden,saopaulo,calibration,0.016719,0.090818,81.590673
3,sweden,saopaulo,validation,0.014549,0.070779,79.445061


In [26]:
df_sens.to_csv('../../results/para-search-r1/sensitivity_summary.csv', index=False)