# Investigating various algorithms that we have come up with

In [1]:
import os
import sys
import yaml
import itertools

module_path = os.path.abspath(os.path.join('../../../'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from collections import defaultdict

from bson.objectid import ObjectId
import json
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib as mpl

from src.utils.sacred_retrieval import SacredExperimentAccess, get_dicts_key_subset, get_unique_dicts, flatten_dict
from experiments.dispatch.dispatch_utils import nested_dict_to_option_strings

## Algorithm 1 - Assigning parts of the noisy q to the clients that were minibatched.
We take the noisy update to q and distribute it equally amoung those clients that participated. 
The downside of this is it violates the restictions needed to benifit from privacy via subsampling. By making the updates a function of these indicies, we realise information about them throught the updates. This feels like a little bit of a let down. We never directly release these, and so we shouldn't losse ALL our privacy, but we havent figured out how to do this yet.

Theortically here there is a trade off them when selecting the minibatch size. The privacy loss SHOULD be the same regardless. Smaller L will heve more accurate, but more obscured by noise local ti's. Going down to L=1 gives us the fix to Mrins thesis. 

Lets have a look at how it does

Open up the config file and look see:

In [2]:
with open('../experiment_configs/client_homo_bad_q_one.yaml', 'r') as f:
    experiment_config = yaml.load(f, Loader=yaml.FullLoader)
    
experiment_config = flatten_dict(experiment_config)

remove_list = []
for k, v in experiment_config.items():
    if isinstance(v, list):
        if len(v) == 1:
            remove_list.append(k)
        else:
            continue
    else:
        remove_list.append(k)
        
for k in remove_list: experiment_config.pop(k)

experiment_config

{'dataset.name': ['adult', 'abalone'],
 'dataset_dist.M': [1, 3, 10, 20, 50],
 'privacy_settings.C': [30, 50, 100],
 'privacy_settings.L': [1, 3, 10, 20, 50]}

In [3]:
if 'experiment_file' in experiment_config:
    experiment_config.pop("experiment_file")
if 'collection' in experiment_config:
    collection = experiment_config.pop("collection")
else:
    collection = 'sacred'
if 'num_seeds' in experiment_config:
    num_seed = experiment_config.pop('num_seeds')

# seed_values = [("seed", i) for i in range(1, exp_config.pop("num_seeds") + 1)]
all_options = nested_dict_to_option_strings(experiment_config)
product = itertools.product(*all_options)
dp_configs = [dict(item) for item in product]

In [4]:
dp_configs

[{'dataset.name': 'adult',
  'dataset_dist.M': 1,
  'privacy_settings.C': 30,
  'privacy_settings.L': 1},
 {'dataset.name': 'adult',
  'dataset_dist.M': 1,
  'privacy_settings.C': 30,
  'privacy_settings.L': 3},
 {'dataset.name': 'adult',
  'dataset_dist.M': 1,
  'privacy_settings.C': 30,
  'privacy_settings.L': 10},
 {'dataset.name': 'adult',
  'dataset_dist.M': 1,
  'privacy_settings.C': 30,
  'privacy_settings.L': 20},
 {'dataset.name': 'adult',
  'dataset_dist.M': 1,
  'privacy_settings.C': 30,
  'privacy_settings.L': 50},
 {'dataset.name': 'adult',
  'dataset_dist.M': 1,
  'privacy_settings.C': 50,
  'privacy_settings.L': 1},
 {'dataset.name': 'adult',
  'dataset_dist.M': 1,
  'privacy_settings.C': 50,
  'privacy_settings.L': 3},
 {'dataset.name': 'adult',
  'dataset_dist.M': 1,
  'privacy_settings.C': 50,
  'privacy_settings.L': 10},
 {'dataset.name': 'adult',
  'dataset_dist.M': 1,
  'privacy_settings.C': 50,
  'privacy_settings.L': 20},
 {'dataset.name': 'adult',
  'dataset_dis

In [5]:
pvi_configs = get_unique_dicts(get_dicts_key_subset(dp_configs, ['dataset.name', 'dataset_dist.M']))
abalone_pvi_configs = [d for d in pvi_configs if d['dataset.name'] == 'abalone']
adult_pvi_configs = [d for d in pvi_configs if d['dataset.name'] == 'adult']

Connect to the mongo database with the results:

In [6]:
results_access = SacredExperimentAccess(database_name=collection)

In [7]:
abalone_pvi_experiemts = [results_access.get_experiments(name='jalko2017_client_exp', complete=True, config=config) for config in abalone_pvi_configs]

In [8]:
adult_pvi_experiments = [results_access.get_experiments(name='jalko2017_client_exp', complete=True, config=config) for config in adult_pvi_configs]

In [18]:
adult_pvi_metrics = [results_access.get_metrics_by_exp(experiments, ["test_accuracy", "server.client_0_MomentAccountant.epsilon"]) for experiments in adult_pvi_experiments]

In [20]:
adult_pvi_metrics[1][0]

[{'_id': ObjectId('5d5ac31c855e98d9a7634cf7'),
  'name': 'test_accuracy',
  'run_id': 341,
  'steps': [1,
   2,
   3,
   4,
   5,
   6,
   7,
   8,
   9,
   10,
   11,
   12,
   13,
   14,
   15,
   16,
   17,
   18,
   19,
   20,
   21,
   22,
   23,
   24,
   25,
   26,
   27,
   28,
   29,
   30,
   31],
  'values': [0.6553030303030303,
   0.7014742014742015,
   0.6806920556920557,
   0.6863226863226863,
   0.6957411957411958,
   0.645986895986896,
   0.6402538902538902,
   0.631040131040131,
   0.6221334971334971,
   0.6018632268632269,
   0.609029484029484,
   0.6253071253071253,
   0.6052416052416052,
   0.5993038493038493,
   0.6083128583128583,
   0.5996109746109746,
   0.5993038493038493,
   0.5962325962325963,
   0.6072891072891073,
   0.6106674856674856,
   0.6148648648648649,
   0.6113841113841114,
   0.6078009828009828,
   0.6015561015561015,
   0.6032964782964783,
   0.6051392301392301,
   0.605958230958231,
   0.6117936117936118,
   0.6093366093366094,
   0.6076986076986

In [23]:
for e in adult_pvi_experiments[1]:
    print(e['config']['dataset_dist']['M'], e['config']['privacy_settings']['L'])

50 10
50 10
50 10
50 10
50 10
50 10
50 10
50 10
50 10
50 10
50 30
50 30
50 30
50 30
50 30
50 30
50 30
50 30
50 30
50 30
50 10
50 10
50 10
50 10
50 10
50 50
50 50
50 50
50 50
50 50
50 50
50 50
50 50
50 30
50 30
50 30
50 30
50 30
50 50
50 50
50 50
50 50
50 50
50 50
50 50
