Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ gcovr = "*"
coverage = "*"

[packages]
jupyter = "*"
pandas = "*"
2 changes: 1 addition & 1 deletion contributing.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ $ make build

Run the test example:
```
$ pipenv run python examples/carrots.py
$ pipenv run python examples/carrots_demo/carrots.py
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather, can we include this as part of pytest?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we could. I'm just not sure what was the original intention of putting this script in the contributing.md file? Was it perhaps to let newcomers try and run the code by themselves to get a better feel for the repo? In that case, maybe we should still put this command as part of contributing.md?

Copy link
Copy Markdown
Member

@chinmayshah99 chinmayshah99 Sep 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The idea is new-comers could try the python notebook to run our code. the python files are for how to include them as part of the codebase.

I just realized this comment was supposed to be for the file in tests folder.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chinmayshah99 I moved jupyter & pandas to devDependencies but the CI seemed to dislike this (?) I encountered jupyter can't be found error (https://github.com/OpenMined/PyDP/runs/1132943621?check_suite_focus=true)

```

Build the python wheel:
Expand Down
60 changes: 30 additions & 30 deletions examples/Tutorial 4-Launch_demo/DP proof.ipynb
Original file line number Diff line number Diff line change
@@ -1,16 +1,4 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Untitled2.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
Expand All @@ -19,7 +7,7 @@
"colab_type": "text"
},
"source": [
"## 1.4 Demonstartion of Differential Privacy using PyDP\n",
"## 1.4 Demonstration of Differential Privacy using PyDP\n",
"The PyDP package provides a Python API into Google's Differential Privacy library. This example uses the alpha 1.0 version of the package that has the following limitations:\n",
"\n",
"Laplace noise generation technique.\n",
Expand Down Expand Up @@ -121,7 +109,7 @@
},
"source": [
"# get carrots data from our public github repo\n",
"url1 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/01.csv'\n",
"url1 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/01.csv'\n",
"df1 = pd.read_csv(url1,sep=\",\", engine = \"python\")\n",
"df1.head()"
],
Expand Down Expand Up @@ -247,7 +235,7 @@
"outputId": "589bc3d9-fb8f-4873-98c0-eb94a9f66d90"
},
"source": [
"url2 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/02.csv'\n",
"url2 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/02.csv'\n",
"df2 = pd.read_csv(url2,sep=\",\", engine = \"python\")\n",
"df2.head()"
],
Expand Down Expand Up @@ -363,7 +351,7 @@
"outputId": "02e8e364-3312-4ab4-a5db-ede8d2440934"
},
"source": [
"url3 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/03.csv'\n",
"url3 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/03.csv'\n",
"df3 = pd.read_csv(url3,sep=\",\", engine = \"python\")\n",
"df3.head()"
],
Expand Down Expand Up @@ -479,7 +467,7 @@
"outputId": "630b1dc8-f383-46bf-f9f0-5ceb3c47c7d5"
},
"source": [
"url4 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/04.csv'\n",
"url4 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/04.csv'\n",
"df4 = pd.read_csv(url4,sep=\",\", engine = \"python\")\n",
"df4.head()"
],
Expand Down Expand Up @@ -595,7 +583,7 @@
"outputId": "70a2f83c-ceac-4611-9ca0-eabc4be7f328"
},
"source": [
"url5 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/05.csv'\n",
"url5 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/05.csv'\n",
"df5 = pd.read_csv(url5,sep=\",\", engine = \"python\")\n",
"df5.head()"
],
Expand Down Expand Up @@ -1241,8 +1229,8 @@
"outputId": "717b1039-6ae2-4920-e6f0-8dc71f4ca9c2"
},
"source": [
"print(f\"Difference in sum using DP: {round(dp_sum_og - dp_sum_redact, 2)}\")\n",
"print(f\"Actual Value: {sales_amount_Osbourne}\")\n",
"print(\"Difference in sum using DP: {}\".format(round(dp_sum_og - dp_sum_redact, 2)))\n",
"print(\"Actual Value: {}\".format(sales_amount_Osbourne))\n",
"assert round(dp_sum_og - dp_sum_redact, 2) != sales_amount_Osbourne\n"
],
"execution_count": 22,
Expand All @@ -1269,8 +1257,8 @@
"outputId": "8de5557f-fc93-4dd4-b64d-85ced6c3ed2c"
},
"source": [
"print(f\"Sum of sales_value in the orignal Dataset: {sum_original_dataset}\")\n",
"print(f\"Sum of sales_value in the orignal Dataset using DP: {dp_sum_og}\")\n",
"print(\"Sum of sales_value in the orignal Dataset: {}\".format(sum_original_dataset))\n",
"print(\"Sum of sales_value in the orignal Dataset using DP: {}\".format(dp_sum_og))\n",
"assert dp_sum_og != sum_original_dataset"
],
"execution_count": 23,
Expand All @@ -1297,8 +1285,8 @@
"outputId": "413db632-2f13-48e8-8f2d-01a6ced01c32"
},
"source": [
"print(f\"Sum of sales_value in the redacted Dataset: {sum_redact_dataset}\")\n",
"print(f\"Sum of sales_value in the redacted Dataset using DP: {dp_sum_redact}\")\n",
"print(\"Sum of sales_value in the redacted Dataset: {}\".format(sum_redact_dataset))\n",
"print(\"Sum of sales_value in the redacted Dataset using DP: {}\".format(dp_sum_redact))\n",
"assert dp_sum_redact != sum_redact_dataset"
],
"execution_count": 24,
Expand Down Expand Up @@ -1495,7 +1483,7 @@
"outputId": "06a1f4c5-12ed-486a-d8ea-cda156c86f3f"
},
"source": [
"print(f\"Difference in sum for first 3000 records which used only 30% privacy budget= {round(abs(actual_partial_sum - partial_sum_dp), 2)}\")"
"print(\"Difference in sum for first 3000 records which used only 30% privacy budget= {}\".format(round(abs(actual_partial_sum - partial_sum_dp), 2)))"
],
"execution_count": 31,
"outputs": [
Expand Down Expand Up @@ -1626,10 +1614,10 @@
"outputId": "c0043965-6fd0-4f5f-f6fa-6a8732977750"
},
"source": [
"print(f\"Actual Sum: {sum_original_dataset}\")\n",
"print(f\"Sum from the previous run with privacy budget 1.0: {dp_sum_og}\")\n",
"print(f\"Sum when using privacy_budget as 0.7 on the whole dataset together: {sum_og_dataset(budget=0.7)}\")\n",
"print(f\"Sum from this run with privacy budget 0.7 on split dataset: {partial_total_sum}\")\n"
"print(\"Actual Sum: {}\".format(sum_original_dataset))\n",
"print(\"Sum from the previous run with privacy budget 1.0: {}\".format(dp_sum_og))\n",
"print(\"Sum when using privacy_budget as 0.7 on the whole dataset together: {}\".format(sum_og_dataset(budget=0.7)))\n",
"print(\"Sum from this run with privacy budget 0.7 on split dataset: {}\".format(partial_total_sum))\n"
],
"execution_count": 36,
"outputs": [
Expand All @@ -1645,5 +1633,17 @@
}
]
}
]
],
"metadata": {
"colab": {
"name": "Untitled2.ipynb",
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
42 changes: 42 additions & 0 deletions tests/notebooks/test_all_notebooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
import subprocess
import tempfile
import glob

# execute notebook in given path
def _execute_notebook(notebook_path: str) -> bool:
# convert notebook-under-test in path to a temp notebook and execute it
with tempfile.NamedTemporaryFile(suffix=".ipynb") as fout:
args = [
"jupyter",
"nbconvert",
"--to",
"notebook",
"--execute",
"--output",
fout.name,
notebook_path,
]
subprocess.check_call(args)

# return true if execution is successful
return True


# return all .ipynb notebooks in a given folder
def _get_all_notebooks(path: str) -> list:
# recursively find all .ipynb files in a given path's subdirectories
notebook_paths = glob.glob(os.path.join(path, "./**/*.ipynb"), recursive=True)

return notebook_paths


# test all notebooks in a given path
def test_all_notebooks(path="examples/"):
# get all notebooks under path
notebook_paths = _get_all_notebooks(path)

# execute each notebook
for notebook_path in notebook_paths:
# make sure notebook is successfully executed
assert _execute_notebook(notebook_path)