### Data Owner Flow

In [3]:
import syft as sy
import pandas as pd

In [4]:
canada_age_data = pd.read_csv("https://raw.githubusercontent.com/OpenMined/datasets/main/synthetic_people_2020/people_in_canada.csv")

In [7]:
canada_age_data

Unnamed: 0,ISO3_code,Location,Age,person_id
0,CAN,Canada,0,953066471
1,CAN,Canada,0,769938298
2,CAN,Canada,0,870031209
3,CAN,Canada,1,881785451
4,CAN,Canada,1,134841618
...,...,...,...,...
326,CAN,Canada,83,450261489
327,CAN,Canada,84,485488090
328,CAN,Canada,85,374404152
329,CAN,Canada,86,756925311


In [9]:
canada_age_data["Age"].min(), canada_age_data["Age"].max()

(0, 87)

In [11]:
age_data = sy.Tensor(canada_age_data["Age"]).annotate_with_dp_metadata(min_val=0, max_val=100, data_subjects=canada_age_data["person_id"])

Tensor annotated with DP Metadata


In [16]:
age_data.child.data_subjects.sum()

331

In [18]:
domain_client = sy.login(email="info@openmined.org", password="changethis", port=8081)


Anyone can login as an admin to your node right now because your password is still the default PySyft username and password!!!

Connecting to localhost... done! 	 Logging into canada... done!


In [21]:
domain_client.load_dataset(
    name="Canada population census",
    assets={"Age (Years)": age_data},
    description=(
        "This contains age data from a sample population of canada"
    )
)



Loading dataset...Loading dataset... checking assets...Loading dataset... checking dataset name for uniqueness...Loading dataset... checking dataset name for uniqueness...                                                                                                                    Loading dataset... checking asset types...                              Loading dataset... uploading...🚀                        

Uploading `Age (Years)`: 100%|[32m███████████████████████████████████████[0m| 1/1 [00:00<00:00, 177.80it/s][0m

Dataset is uploaded successfully !!! 🎉

Run `<your client variable>.datasets` to see your new dataset loaded into your machine!





In [22]:
domain_client.datasets

Idx,Name,Description,Assets,Id
[0],Canada population census,This contains age data from a sample population of canada,"[""Age (Years)""] ->",7f5ccfb2-67ef-4705-bc69-677afa6b95a4


In [24]:
domain_client.datasets[0]

Dataset: Canada population census
Description: This contains age data from a sample population of canada



Asset Key,Type,Shape
"[""Age (Years)""]",,"(331,)"


In [25]:
age_data = domain_client.datasets[0]["Age (Years)"]

In [27]:
age_data

array([ 8, 46, 89, 34,  7, 79,  6, 54, 20, 28, 46, 28, 96, 44, 14, 26, 90,
       10,  9, 42, 86, 56, 96, 54, 43, 81,  1, 93, 78, 62,  3, 85, 49, 52,
       98, 35, 29, 72, 15, 36, 44, 78, 91,  0, 42, 55, 48, 66, 63,  3, 35,
       83, 60, 79, 47, 83, 92, 65, 33, 38,  4, 55, 78, 93, 14,  4, 44, 14,
       22, 44, 50,  1, 49, 37, 61, 20, 21, 26, 83, 34, 84, 31, 91,  0, 39,
       63, 23, 57, 70, 92, 66, 87, 51, 12, 62,  9, 85, 34, 51, 58, 32, 70,
       75, 93, 76, 28, 89, 41, 34, 14, 15, 65, 14, 55, 62, 72, 53, 23, 96,
       91, 58, 26, 56, 61, 59, 35, 68, 86, 12, 28,  1, 11, 34,  8, 17, 60,
       49, 83, 17, 54, 24, 71, 23, 67, 22, 69,  1, 13, 10, 94, 41, 82, 78,
       87, 42, 71, 36, 70, 69, 63, 36, 49,  4, 52, 81, 93, 26, 83, 61,  4,
       35, 47, 86, 90, 75, 17, 93,  5, 75, 97, 26, 56, 56, 61, 59, 43, 65,
       82, 20, 13, 64,  9, 57, 70, 77,  4, 37, 26, 85, 42, 90, 62, 86, 17,
       66, 71, 67, 10, 41, 96,  3, 87, 60, 79, 43, 24, 65,  1, 84, 71, 18,
       61, 84, 70, 66, 47

In [28]:
domain_client.create_user(name="Sheldon Cooper", email="sheldon@test.com", password="bazinga", budget=99999)

User created successfully!


{'name': 'Sheldon Cooper',
 'email': 'sheldon@test.com',
 'password': 'bazinga',
 'url': 'localhost'}

### Data Scientist Flow

In [29]:
ds_domain_client = sy.login(email="sheldon@test.com", password="bazinga", port=8081)

Connecting to localhost... done! 	 Logging into canada... done!


In [32]:
ds_domain_client.datasets[0]

Dataset: Canada population census
Description: This contains age data from a sample population of canada



Asset Key,Type,Shape
"[""Age (Years)""]",,"(331,)"


In [33]:
age_data = ds_domain_client.datasets[0]["Age (Years)"]

In [35]:
mean_age = age_data.mean()

In [41]:
age_gt_60 = age_data > 60
people_above_age_of_60 = age_gt_60.sum()

In [42]:
people_above_age_of_60

248.32248095223784

 (The data printed above is synthetic - it's an imitation of the real data.)

In [43]:
mean_age_public = mean_age.publish(sigma=10)

In [44]:
mean_age_public.exists

True

In [45]:
mean_age_public.get(delete_obj=True)

49.59812881781183

In [48]:
ds_domain_client.privacy_budget

99973.94527769806

In [57]:
canada_age_data["Age"].mean()

39.531722054380666

In [50]:
people_above_age_of_60_public = people_above_age_of_60.publish(sigma=5)

In [52]:
people_above_age_of_60_public.exists

True

In [53]:
people_above_age_of_60_public.get(delete_obj=True)

76.67296184511981

In [55]:
ds_domain_client.privacy_budget

99808.62834082622

In [56]:
sum(canada_age_data["Age"] > 60)

69

In [72]:
mean_age_yrs = mean_age.round()

In [80]:
mean_age_months = mean_age - mean_age_yrs
mean_age_months = mean_age_months * 12

In [81]:
mean_age_months.exists

True

In [90]:
mean_age_months_pub = mean_age_months.publish(sigma=1)

In [None]:
mean_age_yrs_pub = mean_age_yrs.publish(sigma=5)

In [84]:
mean_age_months_pub.exists

False

In [91]:
mean_age_months_pub.exists

True

In [93]:
mean_age_yrs_pub.get(delete_obj=False), mean_age_months_pub.get(delete_obj=False)

(41.260079693944675, -4.742555601438492)