Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docsource/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ It provides a consistent and convenient way to test the execution of your query
:caption: Basic Usage

usage/defining_table_mocks
usage/dbt
usage/your_sql_query_to_test
usage/result_assertion
usage/default_values
Expand Down
130 changes: 130 additions & 0 deletions docsource/usage/dbt.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Enhanced SQLMock with dbt Integration Guide

## Introduction

This guide will provide a quick start on how to use SQLMock with dbt (data build tool). You can use it to mock dbt models, sources, and seed models. We'll cover how to use these features effectively in your unit tests.

## Prerequisites

- A working dbt project with a `manifest.json` file **that has the latest compiled run.** (make sure to run `dbt compile`).
- The SQLMock library installed in your Python environment.

## Configuration

### Setting the dbt Manifest Path

Initialize your testing environment by setting the global path to your dbt manifest file:

```python
from sql_mock.config import SQLMockConfig

SQLMockConfig.set_dbt_manifest_path('/path/to/your/dbt/manifest.json')
```

## Creating Mock Tables

SQLMock offers specialized decorators for different dbt entities: models, sources, and seeds.

### dbt Model Mock Table

For dbt models, use the `dbt_model_meta` decorator from `sql_mock.dbt`. This decorator is suited for mocking the transformed data produced by dbt models.

```python
from sql_mock.dbt import dbt_model_meta
from sql_mock.bigquery.table_mocks import BigQueryMockTable

@dbt_model_meta(model_name="your_dbt_model_name")
class YourDBTModelTable(BigQueryMockTable):
# Define your table columns and other necessary attributes here
...
```

### dbt Source Mock Table

For dbt sources, use the `dbt_source_meta` decorator from `sql_mock.dbt`. This is ideal for mocking the raw data sources that dbt models consume.

```python
from sql_mock.dbt import dbt_source_meta
from sql_mock.bigquery.table_mocks import BigQueryMockTable

@dbt_source_meta(source_name="your_source_name", table_name="your_source_table")
class YourDBTSourceTable(BigQueryMockTable):
# Define your table columns and other necessary attributes here
...
```

### dbt Seed Mock Table

For dbt seeds, which are static data sets loaded into the database, use the `dbt_seed_meta` decorator from `sql_mock.dbt`.

```python
from sql_mock.dbt import dbt_seed_meta
from sql_mock.bigquery.table_mocks import BigQueryMockTable

@dbt_seed_meta(seed_name="your_dbt_seed_name")
class YourDBTSeedTable(BigQueryMockTable):
# Define your table columns and other necessary attributes here
...
```

## Example: Testing a dbt Model with Upstream Source and Seed Data

Let’s consider a dbt model named `monthly_user_spend` that aggregates data from a source `user_transactions` and a seed `user_categories`.

### Step 1: Define Your Source and Seed Mock Tables

```python
@dbt_source_meta(source_name="transactions", table_name="user_transactions")
class UserTransactionsTable(BigQueryMockTable):
transaction_id = col.Int(default=1)
user_id = col.Int(default=1)
amount = col.Float(default=1.0)
transaction_date = col.Date(default='2023-12-24')

@dbt_seed_meta(seed_name="user_categories")
class UserCategoriesTable(BigQueryMockTable):
user_id = col.Int(default=1)
category = col.String(default='foo')
```

### Step 2: Define Your Model Mock Table

```python
@dbt_model_meta(model_name="monthly_user_spend")
class MonthlyUserSpendTable(BigQueryMockTable):
user_id = col.Int(default=1)
month = col.String(default='foo')
total_spend = col.Float(default=1.0)
category = col.String(default='foo')
```

### Step 3: Write Your Test Case

```python
import datetime

def test_monthly_user_spend_model():
# Mock input data for UserTransactionsTable and UserCategoriesTable
transactions_data = [
{"transaction_id": 1, "user_id": 1, "amount": 120.0, "transaction_date": datetime.date(2023, 1, 10)},
{"transaction_id": 2, "user_id": 2, "amount": 150.0, "transaction_date": datetime.date(2023, 1, 20)},
]

categories_data = [
{"user_id": 1, "category": "Premium"},
{"user_id": 2, "category": "Standard"}
]

transactions_table = UserTransactionsTable.from_dicts(transactions_data)
categories_table = UserCategoriesTable.from_dicts(categories_data)

# Expected result
expected_output = [
{"user_id": 1, "month": "2023-01", "total_spend": 120.0, "category": "Premium"},
{"user_id": 2, "month": "2023-01", "total_spend": 150.0, "category": "Standard"},
]

monthly_spend_table = MonthlyUserSpendTable.from_mocks(input_data=[transactions_table, categories_table])

monthly_spend_table.assert_equal(expected_output)
```
47 changes: 47 additions & 0 deletions examples/dbt/test_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from sql_mock.bigquery import column_mocks as col
from sql_mock.bigquery.table_mocks import BigQueryMockTable
from sql_mock.config import SQLMockConfig
from sql_mock.dbt import dbt_model_meta, dbt_seed_meta, dbt_source_meta

SQLMockConfig.set_dbt_manifest_path("./tests/resources/dbt/dbt_manifest.json")


# NOTE: The Source and Seed classes will not be used in the example test. They are only here for demonstration purpose.
@dbt_source_meta(source_name="source_data", table_name="opportunity_events")
class OpportunityEventsSource(BigQueryMockTable):
event_id = col.Int(default=1)
event_type = col.String(default="foo")
event_date = col.Date(default="2023-12-24")


@dbt_seed_meta(seed_name="country_codes")
class CountryCodesSeed(BigQueryMockTable):
country_code = col.String(default="foo")
country_name = col.String(default="foo")


@dbt_model_meta(model_name="my_first_dbt_model")
class MyFirstDBTModel(BigQueryMockTable):
id = col.Int(default=1)


@dbt_model_meta(model_name="my_second_dbt_model")
class MySecondDBTModel(BigQueryMockTable):
id = col.Int(default=1)


def test_my_second_dbt_model():
# Mock data for the first model
first_model_data = [{"id": 1}, {"id": 2}, {"id": 3}]

# Create a mock table instance with the data
first_model_table = MyFirstDBTModel.from_dicts(first_model_data)

# Expected result for the second model
expected_output = [{"id": 1}] # Assuming the second model filters for entries with id 1 only

# Instantiate the second dbt model mock table with the first model as input
second_model_table = MySecondDBTModel.from_mocks(input_data=[first_model_table])

# Assert that the dbt model's output matches the expected output
second_model_table.assert_equal(expected_output)
12 changes: 12 additions & 0 deletions src/sql_mock/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
class SQLMockConfig:
_dbt_manifest_path = None

@classmethod
def set_dbt_manifest_path(cls, path: str):
cls._dbt_manifest_path = path

@classmethod
def get_dbt_manifest_path(cls):
if cls._dbt_manifest_path is None:
raise ValueError("DBT manifest path is not set. Please set it using set_dbt_manifest_path()")
return cls._dbt_manifest_path
179 changes: 179 additions & 0 deletions src/sql_mock/dbt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import json
from typing import TYPE_CHECKING

from sql_mock.config import SQLMockConfig
from sql_mock.helpers import parse_table_refs, validate_input_mocks
from sql_mock.table_mocks import MockTableMeta

# Needed to avoid circular imports on type check
if TYPE_CHECKING:
from sql_mock.table_mocks import BaseMockTable


def _get_model_metadata_from_dbt_manifest(manifest_path: str, model_name: str) -> dict:
"""
Extracts the rendered SQL query for a specified model from the dbt manifest file.

Args:
manifest_path (str): Path to the dbt manifest.json file.
model_name (str): Name of the dbt model.

Returns:
dict: Dictionary of metadata from dbt (path to compiled sql query and table ref)
"""
with open(manifest_path, "r") as file:
manifest = json.load(file)

for node in manifest["nodes"].values():
if node["resource_type"] == "model" and node["name"] == model_name:
return {
"query_path": node["compiled_path"],
"table_ref": node["relation_name"],
}

raise ValueError(f"Model '{model_name}' not found in dbt manifest.")


def _get_source_metadata_from_dbt_manifest(manifest_path: str, source_name: str, table_name: str) -> dict:
"""
Extracts the table metadata for dbt source from the manifest file.

Args:
manifest_path (str): Path to the dbt manifest.json file.
source_name (str): Name of the dbt source.
table_name (str): Name of the table in the dbt source.

Returns:
dict: Dictionary of metadata from dbt
"""
with open(manifest_path, "r") as file:
manifest = json.load(file)

for node in manifest["sources"].values():
if (
node["resource_type"] == "source"
and node["source_name"] == source_name
and node["identifier"] == table_name
):
return {
"table_ref": node["relation_name"],
}

raise ValueError(f"Source '{source_name}' not found in dbt manifest.")


def _get_seed_metadata_from_dbt_manifest(manifest_path: str, seed_name: str) -> dict:
"""
Extracts the table metadata for dbt seed from the manifest file.

Args:
manifest_path (str): Path to the dbt manifest.json file.
seed_name (str): Name of the dbt seed.

Returns:
dict: Dictionary of metadata from dbt
"""
with open(manifest_path, "r") as file:
manifest = json.load(file)

for node in manifest["nodes"].values():
if node["resource_type"] == "seed" and node["name"] == seed_name:
return {
"table_ref": node["relation_name"],
}

raise ValueError(f"Seed '{seed_name}' not found in dbt manifest.")


def dbt_model_meta(model_name: str, manifest_path: str = None, default_inputs: ["BaseMockTable"] = None):
"""
Decorator that is used to define MockTable metadata for dbt models.

Args:
model_name (string) : Name of the dbt model
manifest_path (string): Path to the dbt manifest file
default_inputs: List of default input mock instances that serve as default input if no other instance of that class is provided.
"""

def decorator(cls):
path = manifest_path or SQLMockConfig.get_dbt_manifest_path()

dbt_meta = _get_model_metadata_from_dbt_manifest(manifest_path=path, model_name=model_name)

parsed_query = ""
with open(dbt_meta["query_path"]) as f:
parsed_query = f.read()

if default_inputs:
validate_input_mocks(default_inputs)

cls._sql_mock_meta = MockTableMeta(
table_ref=parse_table_refs(dbt_meta["table_ref"], dialect=cls._sql_dialect),
query=parsed_query,
default_inputs=default_inputs or [],
)
return cls

return decorator


def dbt_source_meta(
source_name: str, table_name: str, manifest_path: str = None, default_inputs: ["BaseMockTable"] = None
):
"""
Decorator that is used to define MockTable metadata for dbt sources.

Args:
source_name (string) : Name of source
table_name (string): Name of the table in the source
manifest_path (string): Path to the dbt manifest file
default_inputs: List of default input mock instances that serve as default input if no other instance of that class is provided.
"""

def decorator(cls):
path = manifest_path or SQLMockConfig.get_dbt_manifest_path()

dbt_meta = _get_source_metadata_from_dbt_manifest(
manifest_path=path, source_name=source_name, table_name=table_name
)

if default_inputs:
validate_input_mocks(default_inputs)

cls._sql_mock_meta = MockTableMeta(
table_ref=parse_table_refs(dbt_meta["table_ref"], dialect=cls._sql_dialect),
default_inputs=default_inputs or [],
)
return cls

return decorator


def dbt_seed_meta(seed_name: str, manifest_path: str = None, default_inputs: ["BaseMockTable"] = None):
"""
Decorator that is used to define MockTable metadata for dbt sources.

Args:
seed_name (string) : Name of the dbt seed
manifest_path (string): Path to the dbt manifest file
default_inputs: List of default input mock instances that serve as default input if no other instance of that class is provided.
"""

def decorator(cls):
path = manifest_path or SQLMockConfig.get_dbt_manifest_path()

dbt_meta = _get_seed_metadata_from_dbt_manifest(
manifest_path=path,
seed_name=seed_name,
)

if default_inputs:
validate_input_mocks(default_inputs)

cls._sql_mock_meta = MockTableMeta(
table_ref=parse_table_refs(dbt_meta["table_ref"], dialect=cls._sql_dialect),
default_inputs=default_inputs or [],
)
return cls

return decorator
Loading