In [4]:
import json
import os

import numpy as np
import pandas as pd
from tqdm import tqdm

tqdm.pandas()
pd.options.display.max_columns = None

## Distribution: travel purpose grouped by combinations (age group + gender) and type of starting place for penultimate trip (without the possibility of destination being home)

### Helper methods

In [14]:
def save_object(obj, name, out_dir='out'):
    file_name = name if name.endswith('.json') else (name + '.json')
    file_path = os.path.join(out_dir, file_name)
    with open(file_path, 'w') as f:
        json.dump(obj, f)


def load_object(name, in_dir='out'):
    file_name = name if name.endswith('.json') else (name + '.json')
    file_path = os.path.join(in_dir, file_name)
    with open(file_path, 'r') as f:
        return json.load(f)

### Prepare data

In [17]:
in_dir = '../../data/processed/mc/travel_planning'

dest_type_dist = load_object(
    name='dest_type_dist',
    in_dir=in_dir
)

dest_type_dist.keys()

dict_keys(['16-19_K', '16-19_M', '20-24_K', '20-24_M', '25-44_K', '25-44_M', '45-60_K', '45-65_M', '6-15_K', '6-15_M', '61-x_K', '66-x_M'])

Delete 'home' from possible dest types

In [18]:
for age_sex_comb, source_place_types in dest_type_dist.items():
    for source_place_type, destination_place_types in source_place_types.items():
        destination_place_types.pop('dom')
        print(age_sex_comb, end='  -  ')
        print(source_place_type, end='  \t')
        print(destination_place_types)

16-19_K  -  dom  	{'inne': 0.1935483870967742, 'praca': 0.06451612903225806, 'szkola': 0.6021505376344086, 'uczelnia': 0.12903225806451613}
16-19_K  -  inne  	{'inne': 0.2222222222222222, 'praca': 0.037037037037037035, 'szkola': 0.07407407407407407, 'uczelnia': 0.037037037037037035}
16-19_K  -  praca  	{'inne': 0.1111111111111111, 'praca': 0.1111111111111111, 'szkola': 0.1111111111111111, 'uczelnia': 0.1111111111111111}
16-19_K  -  szkola  	{'inne': 0.04918032786885246, 'praca': 0.01639344262295082, 'szkola': 0.01639344262295082, 'uczelnia': 0.01639344262295082}
16-19_K  -  uczelnia  	{'inne': 0.06666666666666667, 'praca': 0.06666666666666667, 'szkola': 0.06666666666666667, 'uczelnia': 0.06666666666666667}
16-19_M  -  dom  	{'inne': 0.16666666666666666, 'praca': 0.03787878787878788, 'szkola': 0.696969696969697, 'uczelnia': 0.09090909090909091}
16-19_M  -  inne  	{'inne': 0.1282051282051282, 'praca': 0.02564102564102564, 'szkola': 0.02564102564102564, 'uczelnia': 0.02564102564102564}
16

### Probability distribution

In [19]:
for age_sex_comb, source_place_types in dest_type_dist.items():
    for source_place_type, destination_place_types in source_place_types.items():
        count_sum = np.array(list(destination_place_types.values())).sum()
        if count_sum != 0:
            for i in list(dest_type_dist[age_sex_comb][source_place_type].keys()):
                dest_type_dist[age_sex_comb][source_place_type][i] = dest_type_dist[age_sex_comb][source_place_type][i] / count_sum
        print(age_sex_comb, end='  -  ')
        print(source_place_type, end='  \t')
        print(destination_place_types)

16-19_K  -  dom  	{'inne': 0.19565217391304346, 'praca': 0.06521739130434782, 'szkola': 0.6086956521739131, 'uczelnia': 0.13043478260869565}
16-19_K  -  inne  	{'inne': 0.6, 'praca': 0.1, 'szkola': 0.2, 'uczelnia': 0.1}
16-19_K  -  praca  	{'inne': 0.25, 'praca': 0.25, 'szkola': 0.25, 'uczelnia': 0.25}
16-19_K  -  szkola  	{'inne': 0.49999999999999994, 'praca': 0.16666666666666666, 'szkola': 0.16666666666666666, 'uczelnia': 0.16666666666666666}
16-19_K  -  uczelnia  	{'inne': 0.25, 'praca': 0.25, 'szkola': 0.25, 'uczelnia': 0.25}
16-19_M  -  dom  	{'inne': 0.1679389312977099, 'praca': 0.038167938931297704, 'szkola': 0.7022900763358778, 'uczelnia': 0.09160305343511449}
16-19_M  -  inne  	{'inne': 0.625, 'praca': 0.12500000000000003, 'szkola': 0.12500000000000003, 'uczelnia': 0.12500000000000003}
16-19_M  -  praca  	{'inne': 0.25, 'praca': 0.25, 'szkola': 0.25, 'uczelnia': 0.25}
16-19_M  -  szkola  	{'inne': 0.7, 'praca': 0.09999999999999999, 'szkola': 0.09999999999999999, 'uczelnia': 0.

### Save probability distribution to .json

In [20]:
out_dir = '../../data/processed/mc/travel_planning'
out_file = 'penultimate_dest_type_dist.json'

if not os.path.exists(out_dir):
    os.makedirs(out_dir)

save_object(
    obj=dest_type_dist, 
    name=out_file, 
    out_dir=out_dir
)