From 04b9f89fd32a1773ac0160aeaab140642d8046cb Mon Sep 17 00:00:00 2001 From: edwardelson Date: Tue, 15 Sep 2020 16:24:48 +0100 Subject: [PATCH 1/5] added test to execute all notebooks in examples --- Pipfile | 1 + contributing.md | 2 +- tests/notebooks/test_all_notebooks.py | 42 +++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 tests/notebooks/test_all_notebooks.py diff --git a/Pipfile b/Pipfile index 75952e7e..4ad29323 100644 --- a/Pipfile +++ b/Pipfile @@ -15,3 +15,4 @@ gcovr = "*" coverage = "*" [packages] +pandas = "*" diff --git a/contributing.md b/contributing.md index ba3b415b..a5fe8815 100644 --- a/contributing.md +++ b/contributing.md @@ -75,7 +75,7 @@ $ make build Run the test example: ``` -$ pipenv run python examples/carrots.py +$ pipenv run python examples/carrots_demo/carrots.py ``` Build the python wheel: diff --git a/tests/notebooks/test_all_notebooks.py b/tests/notebooks/test_all_notebooks.py new file mode 100644 index 00000000..2056b372 --- /dev/null +++ b/tests/notebooks/test_all_notebooks.py @@ -0,0 +1,42 @@ +import os +import subprocess +import tempfile +import glob + +# execute notebook in given path +def _execute_notebook(notebookPath: str) -> bool: + # convert notebook-under-test in path to a temp notebook and execute it + with tempfile.NamedTemporaryFile(suffix=".ipynb") as fout: + args = [ + "jupyter", + "nbconvert", + "--to", + "notebook", + "--execute", + "--output", + fout.name, + notebookPath, + ] + subprocess.check_call(args) + + # return true if execution is successful + return True + + +# return all .ipynb notebooks in a given folder +def _get_all_notebooks(path: str) -> list: + # recursively find all .ipynb files in a given path's subdirectories + notebookPaths = glob.glob(os.path.join(path, "./**/*.ipynb"), recursive=True) + + return notebookPaths + + +# test all notebooks in a given path +def test_all_notebooks(path="examples/"): + # get all notebooks under path + notebookPaths = _get_all_notebooks(path) + + # execute each notebook + for notebookPath in notebookPaths: + # make sure notebook is successfully executed + assert _execute_notebook(notebookPath) From 355513d49a693ee08e08b9f72bb8b019b3655837 Mon Sep 17 00:00:00 2001 From: edwardelson Date: Tue, 15 Sep 2020 17:10:51 +0100 Subject: [PATCH 2/5] added jupyter to pipfile --- Pipfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Pipfile b/Pipfile index 4ad29323..70993c37 100644 --- a/Pipfile +++ b/Pipfile @@ -16,3 +16,4 @@ coverage = "*" [packages] pandas = "*" +jupyter = "*" From fb14c5347e15441a6e1ed86d3a7b6ae505c58a52 Mon Sep 17 00:00:00 2001 From: edwardelson Date: Fri, 18 Sep 2020 09:40:44 +0100 Subject: [PATCH 3/5] moved dependencies to dev and fixed dir in dp proof notebook --- Pipfile | 4 +-- .../Tutorial 4-Launch_demo/DP proof.ipynb | 36 +++++++++---------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Pipfile b/Pipfile index 70993c37..3bda9a87 100644 --- a/Pipfile +++ b/Pipfile @@ -13,7 +13,7 @@ sphinx = "*" sphinx-rtd-theme = "*" gcovr = "*" coverage = "*" - -[packages] pandas = "*" jupyter = "*" + +[packages] diff --git a/examples/Tutorial 4-Launch_demo/DP proof.ipynb b/examples/Tutorial 4-Launch_demo/DP proof.ipynb index b9f75c26..74d03498 100644 --- a/examples/Tutorial 4-Launch_demo/DP proof.ipynb +++ b/examples/Tutorial 4-Launch_demo/DP proof.ipynb @@ -1,16 +1,4 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "Untitled2.ipynb", - "provenance": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, "cells": [ { "cell_type": "markdown", @@ -121,7 +109,7 @@ }, "source": [ "# get carrots data from our public github repo\n", - "url1 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/01.csv'\n", + "url1 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/01.csv'\n", "df1 = pd.read_csv(url1,sep=\",\", engine = \"python\")\n", "df1.head()" ], @@ -247,7 +235,7 @@ "outputId": "589bc3d9-fb8f-4873-98c0-eb94a9f66d90" }, "source": [ - "url2 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/02.csv'\n", + "url2 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/02.csv'\n", "df2 = pd.read_csv(url2,sep=\",\", engine = \"python\")\n", "df2.head()" ], @@ -363,7 +351,7 @@ "outputId": "02e8e364-3312-4ab4-a5db-ede8d2440934" }, "source": [ - "url3 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/03.csv'\n", + "url3 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/03.csv'\n", "df3 = pd.read_csv(url3,sep=\",\", engine = \"python\")\n", "df3.head()" ], @@ -479,7 +467,7 @@ "outputId": "630b1dc8-f383-46bf-f9f0-5ceb3c47c7d5" }, "source": [ - "url4 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/04.csv'\n", + "url4 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/04.csv'\n", "df4 = pd.read_csv(url4,sep=\",\", engine = \"python\")\n", "df4.head()" ], @@ -595,7 +583,7 @@ "outputId": "70a2f83c-ceac-4611-9ca0-eabc4be7f328" }, "source": [ - "url5 = 'https://raw.githubusercontent.com/OpenMined/PyDP/demo/examples/Tutorial%204-Launch_demo/data/05.csv'\n", + "url5 = 'https://raw.githubusercontent.com/OpenMined/PyDP/dev/examples/Tutorial%204-Launch_demo/data/05.csv'\n", "df5 = pd.read_csv(url5,sep=\",\", engine = \"python\")\n", "df5.head()" ], @@ -1645,5 +1633,17 @@ } ] } - ] + ], + "metadata": { + "colab": { + "name": "Untitled2.ipynb", + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } \ No newline at end of file From 3ef626b0182d6987026e038575fa24ae85439709 Mon Sep 17 00:00:00 2001 From: edwardelson Date: Fri, 18 Sep 2020 09:58:27 +0100 Subject: [PATCH 4/5] moved jupyter to packages --- Pipfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Pipfile b/Pipfile index 3bda9a87..a85c4e57 100644 --- a/Pipfile +++ b/Pipfile @@ -13,7 +13,7 @@ sphinx = "*" sphinx-rtd-theme = "*" gcovr = "*" coverage = "*" -pandas = "*" -jupyter = "*" [packages] +jupyter = "*" +pandas = "*" From 42fca5e3297b2c544d736aa4592a01cd295fd248 Mon Sep 17 00:00:00 2001 From: edwardelson Date: Mon, 21 Sep 2020 13:19:03 +0100 Subject: [PATCH 5/5] updated print format, moved subprocess execution --- .../Tutorial 4-Launch_demo/DP proof.ipynb | 24 +++++++++---------- tests/notebooks/test_all_notebooks.py | 16 ++++++------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/examples/Tutorial 4-Launch_demo/DP proof.ipynb b/examples/Tutorial 4-Launch_demo/DP proof.ipynb index 74d03498..49c51878 100644 --- a/examples/Tutorial 4-Launch_demo/DP proof.ipynb +++ b/examples/Tutorial 4-Launch_demo/DP proof.ipynb @@ -7,7 +7,7 @@ "colab_type": "text" }, "source": [ - "## 1.4 Demonstartion of Differential Privacy using PyDP\n", + "## 1.4 Demonstration of Differential Privacy using PyDP\n", "The PyDP package provides a Python API into Google's Differential Privacy library. This example uses the alpha 1.0 version of the package that has the following limitations:\n", "\n", "Laplace noise generation technique.\n", @@ -1229,8 +1229,8 @@ "outputId": "717b1039-6ae2-4920-e6f0-8dc71f4ca9c2" }, "source": [ - "print(f\"Difference in sum using DP: {round(dp_sum_og - dp_sum_redact, 2)}\")\n", - "print(f\"Actual Value: {sales_amount_Osbourne}\")\n", + "print(\"Difference in sum using DP: {}\".format(round(dp_sum_og - dp_sum_redact, 2)))\n", + "print(\"Actual Value: {}\".format(sales_amount_Osbourne))\n", "assert round(dp_sum_og - dp_sum_redact, 2) != sales_amount_Osbourne\n" ], "execution_count": 22, @@ -1257,8 +1257,8 @@ "outputId": "8de5557f-fc93-4dd4-b64d-85ced6c3ed2c" }, "source": [ - "print(f\"Sum of sales_value in the orignal Dataset: {sum_original_dataset}\")\n", - "print(f\"Sum of sales_value in the orignal Dataset using DP: {dp_sum_og}\")\n", + "print(\"Sum of sales_value in the orignal Dataset: {}\".format(sum_original_dataset))\n", + "print(\"Sum of sales_value in the orignal Dataset using DP: {}\".format(dp_sum_og))\n", "assert dp_sum_og != sum_original_dataset" ], "execution_count": 23, @@ -1285,8 +1285,8 @@ "outputId": "413db632-2f13-48e8-8f2d-01a6ced01c32" }, "source": [ - "print(f\"Sum of sales_value in the redacted Dataset: {sum_redact_dataset}\")\n", - "print(f\"Sum of sales_value in the redacted Dataset using DP: {dp_sum_redact}\")\n", + "print(\"Sum of sales_value in the redacted Dataset: {}\".format(sum_redact_dataset))\n", + "print(\"Sum of sales_value in the redacted Dataset using DP: {}\".format(dp_sum_redact))\n", "assert dp_sum_redact != sum_redact_dataset" ], "execution_count": 24, @@ -1483,7 +1483,7 @@ "outputId": "06a1f4c5-12ed-486a-d8ea-cda156c86f3f" }, "source": [ - "print(f\"Difference in sum for first 3000 records which used only 30% privacy budget= {round(abs(actual_partial_sum - partial_sum_dp), 2)}\")" + "print(\"Difference in sum for first 3000 records which used only 30% privacy budget= {}\".format(round(abs(actual_partial_sum - partial_sum_dp), 2)))" ], "execution_count": 31, "outputs": [ @@ -1614,10 +1614,10 @@ "outputId": "c0043965-6fd0-4f5f-f6fa-6a8732977750" }, "source": [ - "print(f\"Actual Sum: {sum_original_dataset}\")\n", - "print(f\"Sum from the previous run with privacy budget 1.0: {dp_sum_og}\")\n", - "print(f\"Sum when using privacy_budget as 0.7 on the whole dataset together: {sum_og_dataset(budget=0.7)}\")\n", - "print(f\"Sum from this run with privacy budget 0.7 on split dataset: {partial_total_sum}\")\n" + "print(\"Actual Sum: {}\".format(sum_original_dataset))\n", + "print(\"Sum from the previous run with privacy budget 1.0: {}\".format(dp_sum_og))\n", + "print(\"Sum when using privacy_budget as 0.7 on the whole dataset together: {}\".format(sum_og_dataset(budget=0.7)))\n", + "print(\"Sum from this run with privacy budget 0.7 on split dataset: {}\".format(partial_total_sum))\n" ], "execution_count": 36, "outputs": [ diff --git a/tests/notebooks/test_all_notebooks.py b/tests/notebooks/test_all_notebooks.py index 2056b372..e6c6d9a9 100644 --- a/tests/notebooks/test_all_notebooks.py +++ b/tests/notebooks/test_all_notebooks.py @@ -4,7 +4,7 @@ import glob # execute notebook in given path -def _execute_notebook(notebookPath: str) -> bool: +def _execute_notebook(notebook_path: str) -> bool: # convert notebook-under-test in path to a temp notebook and execute it with tempfile.NamedTemporaryFile(suffix=".ipynb") as fout: args = [ @@ -15,9 +15,9 @@ def _execute_notebook(notebookPath: str) -> bool: "--execute", "--output", fout.name, - notebookPath, + notebook_path, ] - subprocess.check_call(args) + subprocess.check_call(args) # return true if execution is successful return True @@ -26,17 +26,17 @@ def _execute_notebook(notebookPath: str) -> bool: # return all .ipynb notebooks in a given folder def _get_all_notebooks(path: str) -> list: # recursively find all .ipynb files in a given path's subdirectories - notebookPaths = glob.glob(os.path.join(path, "./**/*.ipynb"), recursive=True) + notebook_paths = glob.glob(os.path.join(path, "./**/*.ipynb"), recursive=True) - return notebookPaths + return notebook_paths # test all notebooks in a given path def test_all_notebooks(path="examples/"): # get all notebooks under path - notebookPaths = _get_all_notebooks(path) + notebook_paths = _get_all_notebooks(path) # execute each notebook - for notebookPath in notebookPaths: + for notebook_path in notebook_paths: # make sure notebook is successfully executed - assert _execute_notebook(notebookPath) + assert _execute_notebook(notebook_path)