From d373a120a237746800c5892f30e79b9ed0894918 Mon Sep 17 00:00:00 2001 From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com> Date: Tue, 29 Oct 2019 18:11:33 -0400 Subject: [PATCH 1/6] Add EnvVarSecret --- docs/.vuepress/config.js | 6 ++- docs/core/concepts/secrets.md | 42 ++++++++++++++++++++ docs/outline.toml | 10 ++--- src/prefect/tasks/secrets/__init__.py | 1 + src/prefect/tasks/secrets/env_var.py | 46 ++++++++++++++++++++++ tests/tasks/secrets/test_env_var.py | 56 +++++++++++++++++++++++++++ 6 files changed, 154 insertions(+), 7 deletions(-) create mode 100644 docs/core/concepts/secrets.md create mode 100644 src/prefect/tasks/secrets/env_var.py create mode 100644 tests/tasks/secrets/test_env_var.py diff --git a/docs/.vuepress/config.js b/docs/.vuepress/config.js index 5d4a835da9b9..b7c3b293476f 100644 --- a/docs/.vuepress/config.js +++ b/docs/.vuepress/config.js @@ -201,13 +201,15 @@ module.exports = { "concepts/flows", "concepts/parameters", "concepts/states", - "concepts/mapping", "concepts/engine", "concepts/execution", - "concepts/persistence", + "concepts/logging", + "concepts/mapping", "concepts/notifications", + "concepts/persistence", "concepts/results", "concepts/schedules", + "concepts/secrets", "concepts/configuration", "concepts/best-practices", "concepts/common-pitfalls" diff --git a/docs/core/concepts/secrets.md b/docs/core/concepts/secrets.md new file mode 100644 index 000000000000..6fccc0119266 --- /dev/null +++ b/docs/core/concepts/secrets.md @@ -0,0 +1,42 @@ +# Secrets + +## Overview + +Very often, workflows require sensitive information to run: API keys, passwords, tokens, credentials, etc. As a matter of best practice, such information should never be hardcoded into a workflow's source code, as the code itself will need to be guarded. Furthermore, sensitive information should not be provided via a Prefect `Parameter`, because Parameters, like many tasks, can have their results stored. + +Prefect provides a mechanism called `Secrets` for working with sensitive information. `Secrets` are a special kind of task; they may be instantiated and used as the input to any other task. However, `Secret` tasks differ from regular tasks in two ways: +- they access sensitive information at runtime +- they use a special `ResultHandler` to ensure that their results are not serialized + +::: tip Keep secrets secret! +Though Prefect takes steps to ensure that `Secret` objects do not reveal sensitive information, other tasks may not be so careful. Once a secret value is loaded into your flow, it can be used for any purpose. Please use caution anytime you are working with sensitive data. +::: + +## Mechanisms + +### Local Context + +The base `Secret` class first checks for secrets in local context, under `prefect.context.secrets`. This is useful for local testing, as secrets can be added to context by setting the environment variable `PREFECT__CONTEXT__SECRETS__FOO`, corresponding to `secrets.foo`. + +### Prefect Cloud + +If the secret is not found in local context, the base `Secret` class queries the Prefect Cloud API for a stored secret. This call can only be made successfully by authenticated Prefect Cloud Agents. + +### Environment Variables + +The `EnvVarSecret` class reads secret values from environment variables. + +```python +from prefect import task, Flow +from prefect.tasks.secrets import EnvVarSecret + +@task +def print_value(x): + print(x) + +with Flow("Example") as flow: + secret = EnvVarSecret("PATH") + print_value(secret) + +flow.run() # prints the value of the "PATH" environment variable +``` diff --git a/docs/outline.toml b/docs/outline.toml index 8562cf9d2995..6e181fbff56b 100644 --- a/docs/outline.toml +++ b/docs/outline.toml @@ -197,10 +197,10 @@ classes = ["S3Download", "S3Upload", "LambdaCreate", "LambdaDelete" , "LambdaInv [pages.tasks.azure] title = "Azure Tasks" module = "prefect.tasks.azure" -classes = ["BlobStorageDownload", - "BlobStorageUpload", - "CosmosDBCreateItem", - "CosmosDBReadItems", +classes = ["BlobStorageDownload", + "BlobStorageUpload", + "CosmosDBCreateItem", + "CosmosDBReadItems", "CosmosDBQueryItems"] [pages.tasks.azureml] @@ -308,7 +308,7 @@ classes = ["ParseRSSFeed"] [pages.tasks.secrets] title = "Secret Tasks" module = "prefect.tasks.secrets" -classes = ["Secret"] +classes = ["Secret", "EnvVarSecret"] [pages.tasks.snowflake] title = "Snowflake Tasks" diff --git a/src/prefect/tasks/secrets/__init__.py b/src/prefect/tasks/secrets/__init__.py index eb61f73aed58..3a61e7b88cf7 100644 --- a/src/prefect/tasks/secrets/__init__.py +++ b/src/prefect/tasks/secrets/__init__.py @@ -6,3 +6,4 @@ class for interacting with other secret providers. Secrets always use a special prevents the persistence of sensitive information. """ from .base import Secret +from .env_var import EnvVarSecret diff --git a/src/prefect/tasks/secrets/env_var.py b/src/prefect/tasks/secrets/env_var.py new file mode 100644 index 000000000000..130d92b680fa --- /dev/null +++ b/src/prefect/tasks/secrets/env_var.py @@ -0,0 +1,46 @@ +import os +from typing import Any, Callable + +from prefect.tasks.secrets import Secret + + +class EnvVarSecret(Secret): + """ + A `Secret` task that retrieves a value from an environment variable. + + Args: + - env_var (str): the environment variable that contains the secret value + - name (str, optional): a name for the task. If not provided, `env_var` is used. + - cast (Callable[[Any], Any]): A function that will be called on the Parameter + value to coerce it to a type. + - **kwargs (Any, optional): additional keyword arguments to pass to the Task constructor + + Raises: + - ValueError: if a `result_handler` keyword is passed + """ + + def __init__( + self, + env_var: str, + name: str = None, + cast: Callable[[Any], Any] = None, + **kwargs + ): + self.env_var = env_var + self.cast = cast + if name is None: + name = env_var + + super().__init__(name=name, **kwargs) + + def run(self): + """ + Returns the value of an environment variable after applying an optional `cast` function. + + Returns: + - Any: the (optionally type-cast) value of the environment variable + """ + value = os.getenv(self.env_var) + if value is not None and self.cast is not None: + value = self.cast(value) + return value diff --git a/tests/tasks/secrets/test_env_var.py b/tests/tasks/secrets/test_env_var.py new file mode 100644 index 000000000000..7a74b439aefd --- /dev/null +++ b/tests/tasks/secrets/test_env_var.py @@ -0,0 +1,56 @@ +import pendulum +import pytest +import prefect +from prefect.tasks.secrets import EnvVarSecret + + +def test_create_envvarsecret_requires_env_var(): + with pytest.raises(TypeError, match="required positional argument: 'env_var'"): + EnvVarSecret() + + +def test_name_defaults_to_env_var(): + e = EnvVarSecret(env_var="FOO") + assert e.env_var == "FOO" + assert e.name == "FOO" + + +def test_name_can_be_customized(): + e = EnvVarSecret(env_var="FOO", name="BAR") + assert e.env_var == "FOO" + assert e.name == "BAR" + + +def test_default_cast_is_none(): + e = EnvVarSecret(env_var="FOO") + assert e.cast is None + + +def test_run_secret(monkeypatch): + monkeypatch.setenv("FOO", "1") + e = EnvVarSecret(env_var="FOO") + assert e.run() == "1" + + +def test_run_secret_without_env_var_set_returns_none(monkeypatch): + monkeypatch.delenv("FOO", raising=False) + e = EnvVarSecret(env_var="FOO") + assert e.run() is None + + +def test_run_secret_with_cast(monkeypatch): + monkeypatch.setenv("FOO", "1") + e = EnvVarSecret(env_var="FOO", cast=int) + assert e.run() == 1 + + +def test_run_secret_without_env_var_set_returns_none_even_if_cast_set(monkeypatch): + monkeypatch.delenv("FOO", raising=False) + e = EnvVarSecret(env_var="FOO", cast=int) + assert e.run() is None + + +def test_run_secret_with_cast_datetime(monkeypatch): + monkeypatch.setenv("FOO", "2019-01-02 03:04:05") + e = EnvVarSecret(env_var="FOO", cast=pendulum.parse) + assert e.run() == pendulum.datetime(2019, 1, 2, 3, 4, 5) From 4e8f1dcee3926ec1691a12206fc7a565d848ee09 Mon Sep 17 00:00:00 2001 From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com> Date: Tue, 29 Oct 2019 18:13:55 -0400 Subject: [PATCH 2/6] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f8cc3e4b1c5..5447812a4854 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ These changes are available in the [master branch](https://github.com/PrefectHQ/ - Reduces the size of the `prefecthq/prefect` Docker image by ~400MB, which is now the base Docker image used in Flows - [#1648](https://github.com/PrefectHQ/prefect/pull/1648) - Add a new healthcheck for environment dependencies - [#1653](https://github.com/PrefectHQ/prefect/pull/1653) - Add default 30 second timeout to Client requests - [#1672](https://github.com/PrefectHQ/prefect/pull/1672) +- Add `EnvVarSecrets` for loading sensitive information from environment variables - [#1683](https://github.com/PrefectHQ/prefect/pull/1683) ### Task Library From 349560a8431c075825cef7482ea6c617831d8530 Mon Sep 17 00:00:00 2001 From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com> Date: Tue, 29 Oct 2019 20:33:53 -0400 Subject: [PATCH 3/6] Update docs/core/concepts/secrets.md Co-Authored-By: Chris White --- docs/core/concepts/secrets.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/core/concepts/secrets.md b/docs/core/concepts/secrets.md index 6fccc0119266..ba6751432382 100644 --- a/docs/core/concepts/secrets.md +++ b/docs/core/concepts/secrets.md @@ -6,7 +6,7 @@ Very often, workflows require sensitive information to run: API keys, passwords, Prefect provides a mechanism called `Secrets` for working with sensitive information. `Secrets` are a special kind of task; they may be instantiated and used as the input to any other task. However, `Secret` tasks differ from regular tasks in two ways: - they access sensitive information at runtime -- they use a special `ResultHandler` to ensure that their results are not serialized +- they use a special `ResultHandler` to ensure that their results are not stored anywhere ::: tip Keep secrets secret! Though Prefect takes steps to ensure that `Secret` objects do not reveal sensitive information, other tasks may not be so careful. Once a secret value is loaded into your flow, it can be used for any purpose. Please use caution anytime you are working with sensitive data. From d2e7434f8ebe264dd681c9985aa7f42e12232d75 Mon Sep 17 00:00:00 2001 From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com> Date: Tue, 29 Oct 2019 20:40:34 -0400 Subject: [PATCH 4/6] Add `raise_if_missing` kwarg --- src/prefect/tasks/secrets/env_var.py | 5 +++++ tests/tasks/secrets/test_env_var.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/prefect/tasks/secrets/env_var.py b/src/prefect/tasks/secrets/env_var.py index 130d92b680fa..e3a8502cbfb4 100644 --- a/src/prefect/tasks/secrets/env_var.py +++ b/src/prefect/tasks/secrets/env_var.py @@ -13,6 +13,7 @@ class EnvVarSecret(Secret): - name (str, optional): a name for the task. If not provided, `env_var` is used. - cast (Callable[[Any], Any]): A function that will be called on the Parameter value to coerce it to a type. + - raise_if_missing (bool): if True, an error will be raised if the env var is not found. - **kwargs (Any, optional): additional keyword arguments to pass to the Task constructor Raises: @@ -24,10 +25,12 @@ def __init__( env_var: str, name: str = None, cast: Callable[[Any], Any] = None, + raise_if_missing: bool = False, **kwargs ): self.env_var = env_var self.cast = cast + self.raise_if_missing = raise_if_missing if name is None: name = env_var @@ -40,6 +43,8 @@ def run(self): Returns: - Any: the (optionally type-cast) value of the environment variable """ + if self.raise_if_missing and self.env_var not in os.environ: + raise ValueError("Environment variable not set: {}".format(self.env_var)) value = os.getenv(self.env_var) if value is not None and self.cast is not None: value = self.cast(value) diff --git a/tests/tasks/secrets/test_env_var.py b/tests/tasks/secrets/test_env_var.py index 7a74b439aefd..61195cd928b2 100644 --- a/tests/tasks/secrets/test_env_var.py +++ b/tests/tasks/secrets/test_env_var.py @@ -38,6 +38,13 @@ def test_run_secret_without_env_var_set_returns_none(monkeypatch): assert e.run() is None +def test_run_secret_without_env_var_set_raises(monkeypatch): + monkeypatch.delenv("FOO", raising=False) + e = EnvVarSecret(env_var="FOO", raise_if_missing=True) + with pytest.raises(ValueError, match="variable not set"): + e.run() + + def test_run_secret_with_cast(monkeypatch): monkeypatch.setenv("FOO", "1") e = EnvVarSecret(env_var="FOO", cast=int) @@ -50,6 +57,13 @@ def test_run_secret_without_env_var_set_returns_none_even_if_cast_set(monkeypatc assert e.run() is None +def test_run_secret_without_env_var_set_raises_with_cast(monkeypatch): + monkeypatch.delenv("FOO", raising=False) + e = EnvVarSecret(env_var="FOO", raise_if_missing=True, cast=int) + with pytest.raises(ValueError, match="variable not set"): + e.run() + + def test_run_secret_with_cast_datetime(monkeypatch): monkeypatch.setenv("FOO", "2019-01-02 03:04:05") e = EnvVarSecret(env_var="FOO", cast=pendulum.parse) From 148f72ee24e3b4bdacc61d8e526990c5406d16db Mon Sep 17 00:00:00 2001 From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com> Date: Tue, 29 Oct 2019 20:40:38 -0400 Subject: [PATCH 5/6] Update secrets.md --- docs/core/concepts/secrets.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/core/concepts/secrets.md b/docs/core/concepts/secrets.md index 6fccc0119266..96ac54df22f6 100644 --- a/docs/core/concepts/secrets.md +++ b/docs/core/concepts/secrets.md @@ -4,9 +4,10 @@ Very often, workflows require sensitive information to run: API keys, passwords, tokens, credentials, etc. As a matter of best practice, such information should never be hardcoded into a workflow's source code, as the code itself will need to be guarded. Furthermore, sensitive information should not be provided via a Prefect `Parameter`, because Parameters, like many tasks, can have their results stored. -Prefect provides a mechanism called `Secrets` for working with sensitive information. `Secrets` are a special kind of task; they may be instantiated and used as the input to any other task. However, `Secret` tasks differ from regular tasks in two ways: -- they access sensitive information at runtime -- they use a special `ResultHandler` to ensure that their results are not serialized +Prefect provides a mechanism called `Secrets` for working with sensitive information. + +- `Secret` tasks, which are special tasks that can be used in your flow when working with sensitive information. Unlike regular tasks, `Secret` tasks are designed to access sensitive information at runtime and use a special `ResultHandler` to ensure the results are not stored. +- The `prefect.client.secrets` API, which provides an interface for working with sensitive information. This API can be used where tasks are unavailable, including notifications, state handlers, and result handlers. ::: tip Keep secrets secret! Though Prefect takes steps to ensure that `Secret` objects do not reveal sensitive information, other tasks may not be so careful. Once a secret value is loaded into your flow, it can be used for any purpose. Please use caution anytime you are working with sensitive data. @@ -16,11 +17,11 @@ Though Prefect takes steps to ensure that `Secret` objects do not reveal sensiti ### Local Context -The base `Secret` class first checks for secrets in local context, under `prefect.context.secrets`. This is useful for local testing, as secrets can be added to context by setting the environment variable `PREFECT__CONTEXT__SECRETS__FOO`, corresponding to `secrets.foo`. +The base `Secret` class first checks for secrets in local context, under `prefect.context.secrets`. This is useful for local testing, as secrets can be added to context by setting the environment variable `PREFECT__CONTEXT__SECRETS__FOO`, corresponding to `secrets.foo` (or `secrets.FOO`, if your OS is case-sensitive). ### Prefect Cloud -If the secret is not found in local context, the base `Secret` class queries the Prefect Cloud API for a stored secret. This call can only be made successfully by authenticated Prefect Cloud Agents. +If the secret is not found in local context and `config.cloud.use_local_secrets=False`, the base `Secret` class queries the Prefect Cloud API for a stored secret. This call can only be made successfully by authenticated Prefect Cloud Agents. ### Environment Variables From a4e36224be322b3c4dbe202f798c807e9e38212a Mon Sep 17 00:00:00 2001 From: Jeremiah Lowin <153965+jlowin@users.noreply.github.com> Date: Thu, 31 Oct 2019 17:11:32 -0400 Subject: [PATCH 6/6] Update changelog --- CHANGELOG.md | 2 +- src/prefect/tasks/secrets/env_var.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e316903e4ca2..883c2bbb875b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ These changes are available in the [master branch](https://github.com/PrefectHQ/ - Add a `save`/`load` interface to Flows - [#1685](https://github.com/PrefectHQ/prefect/pull/1685) - Add option to specify `aws_session_token` for the `FargateTaskEnvironment` - [#1688](https://github.com/PrefectHQ/prefect/pull/1688) +- Add `EnvVarSecrets` for loading sensitive information from environment variables - [#1683](https://github.com/PrefectHQ/prefect/pull/1683) ### Task Library @@ -49,7 +50,6 @@ Released October 29, 2019 - Reduces the size of the `prefecthq/prefect` Docker image by ~400MB, which is now the base Docker image used in Flows - [#1648](https://github.com/PrefectHQ/prefect/pull/1648) - Add a new healthcheck for environment dependencies - [#1653](https://github.com/PrefectHQ/prefect/pull/1653) - Add default 30 second timeout to Client requests - [#1672](https://github.com/PrefectHQ/prefect/pull/1672) -- Add `EnvVarSecrets` for loading sensitive information from environment variables - [#1683](https://github.com/PrefectHQ/prefect/pull/1683) ### Task Library diff --git a/src/prefect/tasks/secrets/env_var.py b/src/prefect/tasks/secrets/env_var.py index e3a8502cbfb4..20203cafd343 100644 --- a/src/prefect/tasks/secrets/env_var.py +++ b/src/prefect/tasks/secrets/env_var.py @@ -15,9 +15,6 @@ class EnvVarSecret(Secret): value to coerce it to a type. - raise_if_missing (bool): if True, an error will be raised if the env var is not found. - **kwargs (Any, optional): additional keyword arguments to pass to the Task constructor - - Raises: - - ValueError: if a `result_handler` keyword is passed """ def __init__(