From dad08d29f37658fc233ed697a67ee80c9875b17d Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Tue, 7 Feb 2023 15:58:11 -0800 Subject: [PATCH 1/2] Update documentation page for the Configuration Reference --- docs/concepts/configs.md | 126 ----------- docs/integrations/airflow.md | 1 - docs/reference/configuration.md | 266 +++++++++++++++++++++++ sqlmesh/core/config/common.py | 14 ++ sqlmesh/core/config/connection.py | 10 +- sqlmesh/core/config/model.py | 5 +- sqlmesh/core/config/root.py | 7 +- sqlmesh/core/config/scheduler.py | 8 - sqlmesh/core/context.py | 6 +- sqlmesh/schedulers/airflow/state_sync.py | 3 - tests/core/test_config.py | 4 +- tests/core/test_context.py | 10 +- 12 files changed, 309 insertions(+), 151 deletions(-) delete mode 100644 docs/concepts/configs.md create mode 100644 docs/reference/configuration.md diff --git a/docs/concepts/configs.md b/docs/concepts/configs.md deleted file mode 100644 index 47de575472..0000000000 --- a/docs/concepts/configs.md +++ /dev/null @@ -1,126 +0,0 @@ -# Configs -Configs define settings for things like engines (such as Snowflake or Spark), schedulers (such as Airflow or Dagster), and the SQL dialect. The config file is defined in config.py in the root directory of your SQLMesh project. - -## Settings -### connections -A dictionary of supported connection and their configurations. The key represents a unique connection name. If there is only one connection, its configuration can be provided directly, omitting the dictionary. - -```python -import duckdb -from sqlmesh.core.config import Config, DuckDBConnectionConfig - -Config( - connections={ - "default": DuckDBConnectionConfig(database="local.duckdb"), - }, -) -``` - -### scheduler -Identifies which scheduler backend to use. The scheduler backend is used both for storing metadata and executing [plans](/concepts/plans). By default, the `BuiltinSchedulerBackend` is used, which uses the existing SQL engine to store metadata and has a simple scheduler. The `AirflowSchedulerBackend` should be used if you want to integrate with Airflow. - -```python -from sqlmesh.core.config import AirflowSchedulerConfig, Config - -Config(scheduler=AirflowSchedulerConfig()) -``` - -### notification_targets -Used to receive logging or updates as SQLMesh processes things. Notification targets can be used to implement things such as integration with Github or Slack. - -### dialect -The default sql dialect of model queries. Default: same as engine dialect. The dialect is used if a [model](/concepts/models) does not define a dialect. Note that this dialect only specifies what the model is written as. At runtime, model queries will be transpiled to the correct engine dialect. - -### physical_schema -The default schema used to store materialized tables. By default, this will store all physical tables managed by SQLMesh in the `sqlmesh` schema/db in your warehouse. - -### snapshot_ttl -Duration before unpromoted snapshots are removed. This is defined as a string with the default `in 1 week`. Other [relative strings](https://dateparser.readthedocs.io/en/latest/) can be used, such as `in 30 days`. - -### time_column_format -The default format to use for all model time columns. Defaults to %Y-%m-%d. - -This time format uses python format codes. https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes. - -### users -A list of users that can be used for approvals/notifications. - -## Precedence -You can configure your project in multiple places, and SQLMesh will prioritize configurations according to -the following order, from least to greatest precedence: - -- A Config object defined in a config.py file at the root of your project: - -```python -# config.py -import duckdb -from sqlmesh.core.engine_adapter import EngineAdapter -local_config = Config( - connection=DuckDBConnectionConfig(database="local.duckdb"), -) -# End config.py - ->>> from sqlmesh import Context ->>> context = Context(path="example", config="local_config") - -``` - -- A Config object used when initializing a Context: - -```python ->>> from sqlmesh import Context ->>> from sqlmesh.core.config import Config ->>> my_config = Config() ->>> context = Context(path="example", config=my_config) - -``` - -- Individual config parameters used when initializing a Context: - -```python ->>> from sqlmesh import Context ->>> from sqlmesh.core.engine_adapter import create_engine_adapter ->>> adapter = create_engine_adapter(duckdb.connect, "duckdb") ->>> context = Context( -... path="example", -... engine_adapter=adapter, -... dialect="duckdb", -... ) -``` - -## Using Config objects -The most common way to configure your SQLMesh project is with a `config.py` module at the root of your -project. A SQLMesh Context will automatically look for Config objects there. You can have multiple -Config objects defined and tell Context which one to use. For example, you can have different -Configs for local and production environments, Airflow, and Model tests. - -Example config.py: -```python -import duckdb - -from sqlmesh.core.config import Config, AirflowSchedulerBackend - -from my_project.utils import load_test_data - - -# An in memory DuckDB config. -config = Config() - -# A stateful DuckDB config. -local_config = Config( - connection=DuckDBConnectionConfig(database="local.duckdb"), -) - -# A config that uses Airflow -airflow_config = Config( - scheduler_backend=AirflowSchedulerBackend(), -) -``` - -To use a Config, pass in its variable name to Context. -```python ->>> from sqlmesh import Context ->>> context = Context(path="example", config="local_config") - -``` - diff --git a/docs/integrations/airflow.md b/docs/integrations/airflow.md index 41071ce13e..40ef048749 100644 --- a/docs/integrations/airflow.md +++ b/docs/integrations/airflow.md @@ -33,7 +33,6 @@ In your SQLMesh repository, create the following configuration: from sqlmesh.core.config import Config, AirflowSchedulerConfig airflow_config = Config( - dialect="spark", scheduler=AirflowSchedulerConfig( airflow_url="https://:/", username="", diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md new file mode 100644 index 0000000000..7d32300a5e --- /dev/null +++ b/docs/reference/configuration.md @@ -0,0 +1,266 @@ +# Configuration Reference +This page contains the list of all available SQLMesh configurations that can be set as either an environment varaible, in the `config.yaml` in a project folder or in the file with the same name in the `~/.sqlmesh` folder. + +Configuration options from different sources have the following order of precedence: +1. Set as an environment variable (eg. `SQLMESH__MODEL_DEFAULTS__DIALECT`). +2. Set in `config.yaml` in a project folder. +3. Set in `config.yaml` in the `~/.sqlmesh` folder. + +## connections +A dictionary of supported connection and their configurations. The key represents a unique connection name. If there is only one connection, its configuration can be provided directly, omitting the dictionary. + +```yaml +connections: + my_connection: + type: snowflake + user: + password: + account: +``` + + +All connection configurations share the `concurrent_tasks` setting which determines the maximum number of tasks that will be run by SQLMesh concurrently when using this connection. + +Below is the list of configuration options specific to each corresponding connection type. + +### duckdb +#### database +The optional database name. If not specified the in-memory database is used. + +**Type:** string +**Default:** `None` + +### snowflake +#### user +The Snowflake username. + +**Type:** string + +#### password +The Snowflake password. + +**Type:** string + +#### account +The Snowflake account name. + +**Type:** string + +#### warehouse +The optional Snowflake warehouse name. + +**Type:** string +**Default:** `None` + +#### database +The optional Snowflake database name. + +**Type:** string +**Default:** `None` + +#### role +The optional Snowflake role name. + +**Type:** string +**Default:** `None` + +### databricks +#### server_hostname +Databricks instance host name + +**Type:** string + +#### http_path +HTTP path either to a DBSQL endpoint (e.g. /sql/1.0/endpoints/1234567890abcdef) or to a DBR interactive cluster (e.g. /sql/protocolv1/o/1234567890123456/1234-123456-slid123) + +**Type:** string + +#### access_token +HTTP Bearer access token, e.g. Databricks Personal Access Token. + +**Type:** string + +#### http_headers +An optional dictionary of HTTP headers that will be set on every request. + +**Type:** dictionary +**Default:** `None` + +#### session_configuration +An optional dictionary of Spark session parameters. + +**Type:** dictionary +**Default:** `None` + +### bigquery + +TBD + +## redshift + +TBD + +## default_connection +The name of a connection to use by default. + +**Type:** string +**Default:** A connection defined first in the `connections` option. + +## test_connection +The name of a connection to use when running tests. + +**Type:** string +**Default:** A DuckDB connection which creates an in-memory database. + +## scheduler +Identifies which scheduler backend to use. The scheduler backend is used both for storing metadata and executing [plans](/concepts/plans). By default, the scheduler type is set to `builtin`, which uses the existing SQL engine to store metadata and has a simple scheduler. The `airflow` type should be set if you want to integrate with Airflow. + +```yaml +scheduler: + type: builtin +``` + +Below is the list of configuration options specific to each corresponding scheduler type. + +### builtin +No addiitonal configuration options are supported by this scheduler type. + +### airflow +#### airflow_url +The URL of the Airflow Webserver. + +**Type:** string + +#### username +The Airflow username. + +**Type:** string + +#### password +The Airflow password. + +**Type:** string + +#### dag_run_poll_interval_secs +Determines how often a running DAG can be polled (in seconds). + +**Type:** int +**Default:** `10` + +#### dag_creation_poll_interval_secs +Determines how often SQLMesh should check whehter a DAG has been created (in seconds). + +**Type:** int +**Default:** `30` + +#### dag_creation_max_retry_attempts +Determines the maximum number of attempts that SQLMesh will make while checking for whether a DAG has been created. + +**Type:** int +**Default:** `10` + +#### backfill_concurrent_tasks +The number of concurrent tasks used for model backfilling during plan application. + +**Type:** int +**Default:** `4` + +#### ddl_concurrent_tasks +The number of concurrent tasks used DDL operations like table / view creation, deletion, etc. + +**Type:** int +**Default:** `4` + +### cloud_composer +This scheduler type shares the same configuration options as the `airflow` type except for `username` and `password`. + +## physical_schema +The default schema used to store physical tables for models. + +**Type:** string +**Default:** `sqlmesh` + +## snapshot_ttl +Duration before unpromoted snapshots are removed. This is defined as a string with the default `in 1 week`. Other [relative strings](https://dateparser.readthedocs.io/en/latest/) can be used, such as `in 30 days`. + +**Type:** string +**Default:** `''` + +## ignore_patterns +Files that match glob patterns specified in this list are ingored when scanning the project folder. + +**Type:** list of strings +**Default:** `[]` + +## time_column_format +The default format to use for all model time columns. + +This time format uses python format codes. https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes. + +**Type:** string +**Default:** `%Y-%m-%d` + +## model_defaults + +This section contains options that are specific to models and which are set automatically unless explicitly overriden in the model definition. + +```yaml +model_defaults: + dialect: snowflake + owner: jen + start: 2022-01-01 +``` + +### kind +The default model kind (see [model kinds](/concepts/models/model_kind)). Example: +``` +model_defaults: + kind: full +``` + +Alternatively if a kind requires addiional parameters it can be provided as an object: +``` +model_defaults: + kind: + name: incremental_by_time_range + time_column: ds +``` + +**Type:** string or object +**Default:** `None` + +### dialect +The SQL dialect that the model's query is written in. + +**Type:** string +**Default:** `None` + +### cron +The cron expression specifying how often the model should be refreshed. + +**Type:** string +**Default:** `None` + +### owner +The owner of a model. Maybe used for notification purposes. + +**Type:** string +**Default:** `None` + +### start +The date / time which determines the earliest data interval that should be processed by a model. This value is used to identify missing data intervals during plan application and restatement. The value can be a datetime string, epoch time in milliseconds or a relative datetime like "1 year ago". + +**Type:** string or int +**Default:** `None` + +### batch_size +The maximum number of intervals that can be evaluated in a single backfill task. If this is None, then all intervals will be processed as part of a single task. If this is set, a model's backfill will be chunked such that each individual task only contains jobs with the maximum of `batch_size` intervals. + +**Type:** int +**Default:** `None` + +### storage_format +The storage format that should be used to store physical tables. Only applicable to egnines like Spark. + +**Type:** string +**Default:** `None` diff --git a/sqlmesh/core/config/common.py b/sqlmesh/core/config/common.py index 79b1c5f022..ba57e28552 100644 --- a/sqlmesh/core/config/common.py +++ b/sqlmesh/core/config/common.py @@ -25,3 +25,17 @@ def _concurrent_tasks_validator(v: t.Any) -> int: allow_reuse=True, check_fields=False, )(_concurrent_tasks_validator) + + +def _http_headers_validator(v: t.Any) -> t.Any: + if isinstance(v, dict): + return [(key, value) for key, value in v.items()] + return v + + +http_headers_validator = validator( + "http_headers", + pre=True, + allow_reuse=True, + check_fields=False, +)(_http_headers_validator) diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 6d9f6dba2b..5ab845b4f4 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -8,7 +8,10 @@ from sqlmesh.core import engine_adapter from sqlmesh.core.config.base import BaseConfig -from sqlmesh.core.config.common import concurrent_tasks_validator +from sqlmesh.core.config.common import ( + concurrent_tasks_validator, + http_headers_validator, +) from sqlmesh.core.engine_adapter import EngineAdapter if sys.version_info >= (3, 9): @@ -155,6 +158,7 @@ class DatabricksAPIConnectionConfig(_ConnectionConfig): type_: Literal["databricks_api"] = Field(alias="type", default="databricks_api") _concurrent_tasks_validator = concurrent_tasks_validator + _http_headers_validator = http_headers_validator @property def _connection_kwargs_keys(self) -> t.Set[str]: @@ -252,6 +256,9 @@ class DatabricksConnectionConfig(_ConnectionConfig): type_: Literal["databricks"] = Field(alias="type", default="databricks") + _concurrent_tasks_validator = concurrent_tasks_validator + _http_headers_validator = http_headers_validator + _has_spark_session_access: bool class Config: @@ -352,6 +359,7 @@ class RedshiftConnectionConfig(_ConnectionConfig): Arg Source: https://github.com/aws/amazon-redshift-python-driver/blob/master/redshift_connector/__init__.py#L146 Note: A subset of properties were selected. Please open an issue/PR if you want to see more supported. + Args: user: The username to use for authentication with the Amazon Redshift cluster. password: The password to use for authentication with the Amazon Redshift cluster. diff --git a/sqlmesh/core/config/model.py b/sqlmesh/core/config/model.py index 43d4c36ea3..e3d80f938e 100644 --- a/sqlmesh/core/config/model.py +++ b/sqlmesh/core/config/model.py @@ -12,11 +12,10 @@ class ModelDefaultsConfig(BaseConfig): Args: kind: The model kind. - dialect: The SQL dialect that the model's query is written in. By default, - this is assumed to be the dialect of the context. - owner: The owner of the model. + dialect: The SQL dialect that the model's query is written in. cron: A cron string specifying how often the model should be refreshed, leveraging the [croniter](https://github.com/kiorky/croniter) library. + owner: The owner of the model. start: The earliest date that the model will be backfilled for. If this is None, then the date is inferred by taking the most recent start date of its ancestors. The start date can be a static datetime or a relative datetime like "1 year ago" diff --git a/sqlmesh/core/config/root.py b/sqlmesh/core/config/root.py index 06faaa9f69..ed62ba582a 100644 --- a/sqlmesh/core/config/root.py +++ b/sqlmesh/core/config/root.py @@ -18,7 +18,7 @@ class Config(BaseConfig): """An object used by a Context to configure your SQLMesh project. - Args: + Args connections: Supported connections and their configurations. Key represents a unique name of a connection. default_connection: The name of a connection to use by default. test_connection: The connection settings for tests. Can be a name which refers to an existing configuration @@ -28,9 +28,11 @@ class Config(BaseConfig): dialect: The default sql dialect of model queries. Default: same as engine dialect. physical_schema: The default schema used to store materialized tables. snapshot_ttl: Duration before unpromoted snapshots are removed. + ignore_patterns: Files that match glob patterns specified in this list are ignored when scanning the project folder. time_column_format: The default format to use for all model time columns. Defaults to %Y-%m-%d. This time format uses python format codes. https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes. users: A list of users that can be used for approvals/notifications. + model_defaults: Default values for model definitions. """ connections: t.Union[ @@ -42,14 +44,13 @@ class Config(BaseConfig): ) scheduler: SchedulerConfig = BuiltInSchedulerConfig() notification_targets: t.List[NotificationTarget] = [] - dialect: str = "" physical_schema: str = "" snapshot_ttl: str = "" ignore_patterns: t.List[str] = [] time_column_format: str = c.DEFAULT_TIME_COLUMN_FORMAT users: t.List[User] = [] - loader: t.Type[Loader] = SqlMeshLoader model_defaults: ModelDefaultsConfig = ModelDefaultsConfig() + loader: t.Type[Loader] = SqlMeshLoader _FIELD_UPDATE_STRATEGY: t.ClassVar[t.Dict[str, UpdateStrategy]] = { "connections": UpdateStrategy.KEY_UPDATE, diff --git a/sqlmesh/core/config/scheduler.py b/sqlmesh/core/config/scheduler.py index a105f160de..a9f2596b23 100644 --- a/sqlmesh/core/config/scheduler.py +++ b/sqlmesh/core/config/scheduler.py @@ -81,7 +81,6 @@ def create_plan_evaluator(self, context: Context) -> PlanEvaluator: class _BaseAirflowSchedulerConfig(_SchedulerConfig): - max_concurrent_requests: int dag_run_poll_interval_secs: int dag_creation_poll_interval_secs: int dag_creation_max_retry_attempts: int @@ -98,7 +97,6 @@ def create_state_reader(self, context: Context) -> t.Optional[StateReader]: return HttpStateReader( client=self.get_client(context.console), - max_concurrent_requests=self.max_concurrent_requests, dag_run_poll_interval_secs=self.dag_run_poll_interval_secs, console=context.console, ) @@ -124,8 +122,6 @@ class AirflowSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig): airflow_url: The URL of the Airflow Webserver. username: The Airflow username. password: The Airflow password. - max_concurrent_requests: The maximum number of concurrent requests when interacting with - the Airflow Webserver. dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds). dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds). dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for @@ -137,7 +133,6 @@ class AirflowSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig): airflow_url: str = AIRFLOW_LOCAL_URL username: str = "airflow" password: str = "airflow" - max_concurrent_requests: int = 2 dag_run_poll_interval_secs: int = 10 dag_creation_poll_interval_secs: int = 30 dag_creation_max_retry_attempts: int = 10 @@ -166,8 +161,6 @@ class CloudComposerSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig): Args: airflow_url: The URL of the Airflow Webserver. - max_concurrent_requests: The maximum number of concurrent requests when interacting with - the Airflow Webserver. dag_run_poll_interval_secs: Determines how often a running DAG can be polled (in seconds). dag_creation_poll_interval_secs: Determines how often SQLMesh should check whether a DAG has been created (in seconds). dag_creation_max_retry_attempts: Determines the maximum number of attempts that SQLMesh will make while checking for @@ -177,7 +170,6 @@ class CloudComposerSchedulerConfig(_BaseAirflowSchedulerConfig, BaseConfig): """ airflow_url: str - max_concurrent_requests: int = 2 dag_run_poll_interval_secs: int = 10 dag_creation_poll_interval_secs: int = 30 dag_creation_max_retry_attempts: int = 10 diff --git a/sqlmesh/core/context.py b/sqlmesh/core/context.py index 7cd517df8f..0ff91b67e6 100644 --- a/sqlmesh/core/context.py +++ b/sqlmesh/core/context.py @@ -240,7 +240,11 @@ def __init__( ) self._test_engine_adapter = test_connection_config.create_engine_adapter() - self.dialect = dialect or self.config.dialect or self._engine_adapter.dialect + self.dialect = ( + dialect + or self.config.model_defaults.dialect + or self._engine_adapter.dialect + ) self.snapshot_evaluator = SnapshotEvaluator( self.engine_adapter, ddl_concurrent_tasks=self.concurrent_tasks diff --git a/sqlmesh/schedulers/airflow/state_sync.py b/sqlmesh/schedulers/airflow/state_sync.py index 1b6136c693..a62b038d33 100644 --- a/sqlmesh/schedulers/airflow/state_sync.py +++ b/sqlmesh/schedulers/airflow/state_sync.py @@ -25,7 +25,6 @@ class HttpStateReader(StateReader): airflow_url: URL pointing to the airflow rest api. username: Username for Airflow. password: Password for Airflow. - max_concurrent_requests: Max number of http requests to make concurrently. blocking_updates: Indicates whether calls that cause state updates should be blocking. dag_run_poll_interval_secs: Determines how frequently the state of a DAG run should be checked. Used to block on calls that update the state. @@ -35,13 +34,11 @@ class HttpStateReader(StateReader): def __init__( self, client: AirflowClient, - max_concurrent_requests: int = 2, blocking_updates: bool = True, dag_run_poll_interval_secs: int = 2, console: t.Optional[Console] = None, ): self._client = client - self.max_concurrent_requests = max_concurrent_requests self.blocking_updates = blocking_updates self.dag_run_poll_interval_secs = dag_run_poll_interval_secs self.console = console diff --git a/tests/core/test_config.py b/tests/core/test_config.py index 30482b44aa..4498346958 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -150,11 +150,11 @@ def test_load_config_from_paths(yaml_config_path: Path, python_config_path: Path def test_load_config_multiple_config_files_in_folder(tmp_path): config_a_path = tmp_path / "config.yaml" with open(config_a_path, "w") as fd: - fd.write("dialect: spark") + fd.write("physical_schema: schema_a") config_b_path = tmp_path / "config.yml" with open(config_b_path, "w") as fd: - fd.write("dialect: duckdb") + fd.write("physical_schema: schema_b") with pytest.raises( ConfigError, match=r"^Multiple configuration files found in folder.*" diff --git a/tests/core/test_context.py b/tests/core/test_context.py index 0b95f0000b..85bbfd7f8e 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -6,7 +6,7 @@ from sqlglot import parse_one import sqlmesh.core.constants -from sqlmesh.core.config import Config +from sqlmesh.core.config import Config, ModelDefaultsConfig from sqlmesh.core.context import Context from sqlmesh.core.plan import BuiltInPlanEvaluator, Plan from sqlmesh.utils.errors import ConfigError @@ -43,7 +43,9 @@ def test_config_precedence(): assert context.physical_schema == "test" # Context parameters take precedence over config - config = Config(dialect="presto", physical_schema="dev") + config = Config( + model_defaults=ModelDefaultsConfig(dialect="presto"), physical_schema="dev" + ) context = Context( path="examples/sushi", dialect="spark", physical_schema="test", config=config ) @@ -52,7 +54,9 @@ def test_config_precedence(): def test_config_parameter(): - config = Config(dialect="presto", physical_schema="dev") + config = Config( + model_defaults=ModelDefaultsConfig(dialect="presto"), physical_schema="dev" + ) context = Context(path="examples/sushi", config=config) assert context.dialect == "presto" assert context.physical_schema == "dev" From 366132b2f912ec4e29f4f7e0f5ebb6d02bdfec51 Mon Sep 17 00:00:00 2001 From: Iaroslav Zeigerman Date: Wed, 8 Feb 2023 11:18:42 -0800 Subject: [PATCH 2/2] Address comments --- docs/reference/configuration.md | 14 +++++++------- sqlmesh/core/config/connection.py | 2 +- sqlmesh/core/config/root.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/reference/configuration.md b/docs/reference/configuration.md index 7d32300a5e..5701205b43 100644 --- a/docs/reference/configuration.md +++ b/docs/reference/configuration.md @@ -1,5 +1,5 @@ # Configuration Reference -This page contains the list of all available SQLMesh configurations that can be set as either an environment varaible, in the `config.yaml` in a project folder or in the file with the same name in the `~/.sqlmesh` folder. +This page contains the list of all available SQLMesh configurations that can be set as either environment varaibles, in the `config.yaml` in a project folder or in the file with the same name in the `~/.sqlmesh` folder. Configuration options from different sources have the following order of precedence: 1. Set as an environment variable (eg. `SQLMESH__MODEL_DEFAULTS__DIALECT`). @@ -7,7 +7,7 @@ Configuration options from different sources have the following order of precede 3. Set in `config.yaml` in the `~/.sqlmesh` folder. ## connections -A dictionary of supported connection and their configurations. The key represents a unique connection name. If there is only one connection, its configuration can be provided directly, omitting the dictionary. +A dictionary of supported connections and their configurations. The key represents a unique connection name. If there is only one connection, its configuration can be provided directly, omitting the dictionary. ```yaml connections: @@ -25,7 +25,7 @@ Below is the list of configuration options specific to each corresponding connec ### duckdb #### database -The optional database name. If not specified the in-memory database is used. +The optional database name. If not specified, the in-memory database is used. **Type:** string **Default:** `None` @@ -166,7 +166,7 @@ The number of concurrent tasks used for model backfilling during plan applicatio **Default:** `4` #### ddl_concurrent_tasks -The number of concurrent tasks used DDL operations like table / view creation, deletion, etc. +The number of concurrent tasks used for DDL operations like table / view creation, deletion, etc. **Type:** int **Default:** `4` @@ -218,7 +218,7 @@ model_defaults: kind: full ``` -Alternatively if a kind requires addiional parameters it can be provided as an object: +Alternatively if a kind requires additional parameters it can be provided as an object: ``` model_defaults: kind: @@ -242,7 +242,7 @@ The cron expression specifying how often the model should be refreshed. **Default:** `None` ### owner -The owner of a model. Maybe used for notification purposes. +The owner of a model. May be used for notification purposes. **Type:** string **Default:** `None` @@ -260,7 +260,7 @@ The maximum number of intervals that can be evaluated in a single backfill task. **Default:** `None` ### storage_format -The storage format that should be used to store physical tables. Only applicable to egnines like Spark. +The storage format that should be used to store physical tables. Only applicable to engines like Spark. **Type:** string **Default:** `None` diff --git a/sqlmesh/core/config/connection.py b/sqlmesh/core/config/connection.py index 5ab845b4f4..fb5e7502a3 100644 --- a/sqlmesh/core/config/connection.py +++ b/sqlmesh/core/config/connection.py @@ -64,7 +64,7 @@ class DuckDBConnectionConfig(_ConnectionConfig): """Configuration for the DuckDB connection. Args: - database: The optional database name. If not specified the in-memory database will be used. + database: The optional database name. If not specified, the in-memory database will be used. concurrent_tasks: The maximum number of tasks that can use this connection concurrently. """ diff --git a/sqlmesh/core/config/root.py b/sqlmesh/core/config/root.py index ed62ba582a..0c141f2025 100644 --- a/sqlmesh/core/config/root.py +++ b/sqlmesh/core/config/root.py @@ -18,7 +18,7 @@ class Config(BaseConfig): """An object used by a Context to configure your SQLMesh project. - Args + Args: connections: Supported connections and their configurations. Key represents a unique name of a connection. default_connection: The name of a connection to use by default. test_connection: The connection settings for tests. Can be a name which refers to an existing configuration