In [64]:
import config
import shelve
import pandas as pd
import os

In [65]:
def get_stats_df(rep, privacy_budget, buckets_trips, buckets_trips_per_hour, buckets_visits_per_tile):
    sum_buckets = buckets_trips + buckets_trips_per_hour + buckets_visits_per_tile
    n_trips = rep["ds_statistics"].data["n_trips"]
    moe_trips = rep["ds_statistics"].margin_of_errors_laplace["trips"]
    trips_per_hour = rep["trips_per_hour"].data
    trips_per_hour["count"] = trips_per_hour.perc / 100 * n_trips  # abs instead o perc
    moe_trips_per_hour = rep["trips_per_hour"].margin_of_error_laplace 
    moe_trips_per_hour = moe_trips_per_hour / 100 * n_trips  # abs instead o perc
    visits_per_tile = rep["visits_per_tile"].data
    moe_visits_per_tile = rep["visits_per_tile"].margin_of_error_laplace

    return pd.DataFrame({
                "name":["trips", "trips_per_hour", "visits_per_tile"],
                "budget":[privacy_budget/sum_buckets*buckets_trips, 
                    privacy_budget/sum_buckets*buckets_trips_per_hour, 
                    privacy_budget/sum_buckets*buckets_visits_per_tile],
                "stats (mean)":[n_trips, trips_per_hour["count"].mean(), visits_per_tile["visits"].mean()],
                "min": [n_trips, trips_per_hour["count"].min(), visits_per_tile["visits"].min()],
                "max":[n_trips, trips_per_hour["count"].max(), visits_per_tile["visits"].max()],
                "moe_abs":[round(moe_trips), round(moe_trips_per_hour), round(moe_visits_per_tile)],
                "moe_perc":[moe_trips/n_trips, moe_trips_per_hour/trips_per_hour["count"].mean(), moe_visits_per_tile/visits_per_tile["visits"].mean()]}).round(2)

In [66]:
def get_linear_and_equal_dfs(M, privacy_budget):

    #### equal ####

    d = shelve.open(os.path.join(config.budget_split_output, f"equal_dist_M_{M}_eps_{privacy_budget}"))
    rep = d["report"]
    equal = get_stats_df(rep, privacy_budget, 1, 1, 1)

    #### linear #### 
    buckets_trips = 1
    buckets_trips_per_hour = 48
    buckets_visits_per_tile = 962

    d = shelve.open(os.path.join(config.budget_split_output, 
        f"linear_M_{M}_eps_{privacy_budget}_split{buckets_trips}_{buckets_trips_per_hour}_{buckets_visits_per_tile}"))

    rep = d["report"]

    linear = get_stats_df(rep, privacy_budget, buckets_trips, buckets_trips_per_hour, buckets_visits_per_tile)


    #### custom ####

    buckets_trips = 1
    buckets_trips_per_hour = 10
    buckets_visits_per_tile = 100

    d = shelve.open(os.path.join(config.budget_split_output, 
    f"custom_M_{M}_eps_{privacy_budget}_split{buckets_trips}_{buckets_trips_per_hour}_{buckets_visits_per_tile}"))

    rep = d["report"]

    custom = get_stats_df(rep, privacy_budget, buckets_trips, buckets_trips_per_hour, buckets_visits_per_tile)


    return equal, linear, custom

In [67]:
equal_5, linear_5, custom_5 = get_linear_and_equal_dfs(5, 100)
equal_200, linear_200, custom_200 = get_linear_and_equal_dfs(200, 100)
equal_1, linear_1, custom_1 = get_linear_and_equal_dfs(1, 100)

In [68]:
equal_1

Unnamed: 0,name,budget,stats (mean),min,max,moe_abs,moe_perc
0,trips,33.33,182.0,182.0,182.0,1,0.0
1,trips_per_hour,33.33,3.79,0.0,17.0,0,0.02
2,visits_per_tile,33.33,0.36,0.0,89.0,0,0.57


In [69]:
linear_1

Unnamed: 0,name,budget,stats (mean),min,max,moe_abs,moe_perc
0,trips,0.1,169.0,169.0,169.0,242,1.43
1,trips_per_hour,4.75,3.47,0.0,15.79,1,0.17
2,visits_per_tile,95.15,0.34,0.0,83.0,0,0.21


In [70]:
custom_1

Unnamed: 0,name,budget,stats (mean),min,max,moe_abs,moe_perc
0,trips,0.9,192.0,192.0,192.0,27,0.14
1,trips_per_hour,9.01,4.01,0.0,16.97,0,0.09
2,visits_per_tile,90.09,0.37,0.0,101.0,0,0.23


In [71]:
equal_5

Unnamed: 0,name,budget,stats (mean),min,max,moe_abs,moe_perc
0,trips,33.33,854.0,854.0,854.0,4,0.0
1,trips_per_hour,33.33,17.79,2.01,87.41,0,0.03
2,visits_per_tile,33.33,1.72,0.0,413.0,1,0.58


In [72]:
linear_5

Unnamed: 0,name,budget,stats (mean),min,max,moe_abs,moe_perc
0,trips,0.1,1073.0,1073.0,1073.0,1211,1.13
1,trips_per_hour,4.75,22.65,0.0,98.37,4,0.18
2,visits_per_tile,95.15,2.15,0.0,517.0,0,0.21


In [73]:
custom_5

Unnamed: 0,name,budget,stats (mean),min,max,moe_abs,moe_perc
0,trips,0.9,878.0,878.0,878.0,131,0.15
1,trips_per_hour,9.01,18.37,0.0,68.66,2,0.09
2,visits_per_tile,90.09,1.74,0.0,437.0,0,0.22


In [74]:
equal_200

Unnamed: 0,name,budget,stats (mean),min,max,moe_abs,moe_perc
0,trips,33.33,10811.0,10811.0,10811.0,144,0.01
1,trips_per_hour,33.33,225.2,3.98,909.71,18,0.08
2,visits_per_tile,33.33,22.17,0.0,3043.0,32,1.44


In [75]:
linear_200

Unnamed: 0,name,budget,stats (mean),min,max,moe_abs,moe_perc
0,trips,0.1,11170.0,11170.0,11170.0,41021,3.67
1,trips_per_hour,4.75,230.84,0.0,931.25,126,0.54
2,visits_per_tile,95.15,19.34,0.0,3012.0,11,0.57


In [76]:
custom_200

Unnamed: 0,name,budget,stats (mean),min,max,moe_abs,moe_perc
0,trips,0.9,13274.0,13274.0,13274.0,5319,0.4
1,trips_per_hour,9.01,273.57,0.0,1108.19,81,0.29
2,visits_per_tile,90.09,27.31,0.0,4205.0,16,0.6
