Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate model reference in mock table #5

Merged
merged 2 commits into from
Oct 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ Before diving into specific database scenarios, let's start with a simplified ex
2. Using SQL Mock, you define mock tables. You can use the built-in column types provided by SQL Mock. Available column types include `Int`, `String`, `Date`, and more. Each database type has their own column types. Define your tables by subclassing a mock table class that fits your database (e.g. `BigQueryMockTable`) and specifying the column types along with default values. In our example we use the `ClickhouseTableMock` class
```python
from sql_mock.clickhouse import column_mocks as col
from sql_mock.clickhouse.table_mocks import ClickHouseTableMock
from sql_mock.clickhouse.table_mocks import ClickHouseTableMock, table_meta

@table_meta(table_ref='data.table1)
class Table(ClickHouseTableMock):
id = col.Int(default=1)
name = col.String(default='Peter')


@table_meta(table_ref='data.result_table')
class ResultTable(ClickhouseTableMock):
id = col.Int(default=1)
```
Expand All @@ -40,13 +42,13 @@ Before diving into specific database scenarios, let's start with a simplified ex
{'id': 3}, # This will use defaults for the name
]

table_input_data = Table(data=user_data)
input_table_mock = Table.from_dicts(user_data)
```


4. **Getting results for a table mock:** Use the `from_inputs` method of the table mock object to generate mock query results based on your mock data.
```python
res = ResultTable.from_inputs(query='SELECT id FROM data.table1', input_data={'data.table1': table_input_data})
res = ResultTable.from_mocks(query='SELECT id FROM data.table1', input_data=[input_table_mock])
```

5. Behind the scene SQL Mock replaces table references (e.g. `data.table1`) in your query with Common Table Expressions (CTEs) filled with dummy data. It can roughly be compared to something like this:
Expand Down
13 changes: 7 additions & 6 deletions examples/bigquery/test_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from sql_mock.bigquery import column_mocks as col
from sql_mock.bigquery.table_mocks import BigQueryMockTable
from sql_mock.table_mocks import table_meta

query = """
SELECT
Expand All @@ -13,11 +14,13 @@
"""


@table_meta(table_ref="data.users")
class UserTable(BigQueryMockTable):
user_id = col.Int(default=1)
user_name = col.String(default="Mr. T")


@table_meta(table_ref="data.subscriptions")
class SubscriptionTable(BigQueryMockTable):
subscription_id = col.Int(default=1)
period_start_date = col.Date(default=datetime.date(2023, 9, 5))
Expand All @@ -31,9 +34,9 @@ class SubscriptionCountTable(BigQueryMockTable):


def test_something():
users = UserTable(data=[{"user_id": 1}, {"user_id": 2}])
subscriptions = SubscriptionTable(
data=[
users = UserTable.from_dicts([{"user_id": 1}, {"user_id": 2}])
subscriptions = SubscriptionTable.from_dicts(
[
{"subscription_id": 1, "user_id": 1},
{"subscription_id": 2, "user_id": 1},
{"subscription_id": 2, "user_id": 2},
Expand All @@ -42,8 +45,6 @@ def test_something():

expected = [{"user_id": 1, "subscription_count": 2}, {"user_id": 2, "subscription_count": 1}]

res = SubscriptionCountTable.from_inputs(
query=query, input_data={"data.users": users, "data.subscriptions": subscriptions}
)
res = SubscriptionCountTable.from_mocks(query=query, input_data=[users, subscriptions])

res.assert_equal(expected)
13 changes: 7 additions & 6 deletions examples/clickhouse/test_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from sql_mock.clickhouse import column_mocks as col
from sql_mock.clickhouse.table_mocks import ClickHouseTableMock
from sql_mock.table_mocks import table_meta

query = """
SELECT
Expand All @@ -13,11 +14,13 @@
"""


@table_meta(table_ref="data.users")
class UserTable(ClickHouseTableMock):
user_id = col.Int(default=1)
user_name = col.String(default="Mr. T")


@table_meta(table_ref="data.subscriptions")
class SubscriptionTable(ClickHouseTableMock):
subscription_id = col.Int(default=1)
period_start_date = col.Date(default=datetime.date(2023, 9, 5))
Expand All @@ -31,9 +34,9 @@ class SubscriptionCountTable(ClickHouseTableMock):


def test_something():
users = UserTable(data=[{"user_id": 1}, {"user_id": 2}])
subscriptions = SubscriptionTable(
data=[
users = UserTable.from_dicts([{"user_id": 1}, {"user_id": 2}])
subscriptions = SubscriptionTable.from_dicts(
[
{"subscription_id": 1, "user_id": 1},
{"subscription_id": 2, "user_id": 1},
{"subscription_id": 2, "user_id": 2},
Expand All @@ -42,8 +45,6 @@ def test_something():

expected = [{"user_id": 2, "subscription_count": 1}, {"user_id": 1, "subscription_count": 2}]

res = SubscriptionCountTable.from_inputs(
query=query, input_data={"data.users": users, "data.subscriptions": subscriptions}
)
res = SubscriptionCountTable.from_mocks(query=query, input_data=[users, subscriptions])

res.assert_equal(expected)
26 changes: 13 additions & 13 deletions src/sql_mock/bigquery/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,23 @@ Ensure you have the `GOOGLE_APPLICATION_CREDENTIALS` environment variable correc
import datetime
from sql_mock.bigquery import column_mocks as col
from sql_mock.bigquery.table_mocks import BigQueryMockTable
from sql_mock.table_mocks import table_meta

# Define mock tables for your data model that inherit from BigQueryMockTable
@table_meta(table_ref='data.users')
class UserTable(BigQueryMockTable):
user_id = col.Int(default=1)
user_name = col.String(default='Mr. T')


@table_meta(table_ref='data.subscriptions')
class SubscriptionTable(BigQueryMockTable):
subscription_id = col.Int(default=1)
period_start_date = col.Date(default=datetime.date(2023, 9, 5))
period_end_date = col.Date(default=datetime.date(2023, 9, 5))
user_id = col.Int(default=1)


# Define a mock table for your expected results
class SubscriptionCountTable(BigQueryMockTable):
subscription_count = col.Int(default=1)
Expand All @@ -38,14 +43,12 @@ GROUP BY user_id
"""

# Create mock data for the 'data.users' and 'data.subscriptions' tables
users = UserTable(data=[{'user_id': 1}, {'user_id': 2}])
subscriptions = SubscriptionTable(
data=[
{'subscription_id': 1, 'user_id': 1},
{'subscription_id': 2, 'user_id': 1},
{'subscription_id': 2, 'user_id': 2},
]
)
users = UserTable.from_dicts([{'user_id': 1}, {'user_id': 2}])
subscriptions = SubscriptionTable.from_dicts([
{'subscription_id': 1, 'user_id': 1},
{'subscription_id': 2, 'user_id': 1},
{'subscription_id': 2, 'user_id': 2},
])

# Define your expected results
expected = [
Expand All @@ -54,12 +57,9 @@ expected = [
]

# Simulate the SQL query using SQL Mock
res = SubscriptionCountTable.from_inputs(
res = SubscriptionCountTable.from_mocks(
query=query,
input_data={
'data.users': users,
'data.subscriptions': subscriptions
}
input_data=[users, subscriptions]
)

# Assert the results
Expand Down
21 changes: 11 additions & 10 deletions src/sql_mock/clickhouse/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,15 @@ You need to provide the following environment variables:
```python
from sql_mock.clickhouse import column_mocks as col
from sql_mock.clickhouse.table_mocks import ClickHouseTableMock
from sql_mock.table_mocks import table_meta

# Define mock tables for your data model that inherit from ClickHouseTableMock
@table_meta(table_ref='data.users')
class UserTable(ClickHouseTableMock):
user_id = col.Int(default=1)
user_name = col.String(default='Mr. T')
user_name = col.String(default="Mr. T")

@table_meta(table_ref='data.subscriptions')
class SubscriptionTable(ClickHouseTableMock):
subscription_id = col.Int(default=1)
period_start_date = col.Date(default=datetime.date(2023, 9, 5))
Expand All @@ -43,14 +46,12 @@ GROUP BY user_id
"""

# Create mock data for the 'data.users' and 'data.subscriptions' tables
users = UserTable(data=[{'user_id': 1}, {'user_id': 2}])
subscriptions = SubscriptionTable(
data=[
{'subscription_id': 1, 'user_id': 1},
{'subscription_id': 2, 'user_id': 1},
{'subscription_id': 2, 'user_id': 2},
]
)
users = UserTable.from_dicts([{'user_id': 1}, {'user_id': 2}])
subscriptions = SubscriptionTable.from_dicts([
{'subscription_id': 1, 'user_id': 1},
{'subscription_id': 2, 'user_id': 1},
{'subscription_id': 2, 'user_id': 2},
])

# Define your expected results
expected = [
Expand All @@ -59,7 +60,7 @@ expected = [
]

# Simulate the SQL query using SQL Mock
res = SubscriptionCountTable.from_inputs(query=query, input_data={'data.users': users, 'data.subscriptions': subscriptions})
res = SubscriptionCountTable.from_mocks(query=query, input_data=[users, subscriptions])

# Assert the results
res.assert_equal(expected)
Expand Down
2 changes: 2 additions & 0 deletions src/sql_mock/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class ValidationError(Exception):
pass
57 changes: 46 additions & 11 deletions src/sql_mock/table_mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,32 @@

from sql_mock.column_mocks import ColumnMock
from sql_mock.constants import NO_INPUT
from sql_mock.exceptions import ValidationError


def get_keys_from_list_of_dicts(data: list[dict]) -> set[str]:
return set(key for dictionary in data for key in dictionary.keys())


def table_meta(table_ref):
"""Decorator that is used to define MockTable metadata"""

def decorator(cls):
cls._table_ref = table_ref
return cls

return decorator


def validate_input_mocks(table_mocks: list["BaseMockTable"]):
# Check that each input table mock has a _table_ref defined
missing_table_refs = [type(mock_table).__name__ for mock_table in table_mocks if not mock_table._table_ref]
if missing_table_refs:
missing_table_ref_str = ",".join(missing_table_refs)
msg = f"If you want to use a MockTable instance as input, you need to provide a table_reference using the table_meta decorator. Missing table refs for models: {missing_table_ref_str}"
raise ValidationError(msg)


class BaseMockTable:
"""
Represents a base class for creating mock database tables for testing.
Expand All @@ -16,10 +36,22 @@ class BaseMockTable:
col1 = Int(default=1)

Attributes:
_table_ref (string) : String that represents the table reference to the original table.
_columns (dict): An auto-generated dictionary of column names and corresponding ColumnMock instances.
_data (list): An auto-generated list of dictionaries representing rows of data based on the inputs in __init__.
_data (list): An auto-generated list of dictionaries representing rows of data.
_input_data (list): An auto-generated list of dictonaries representing the upstream model input data
_rendered_query (string): The fully rendered query based on jinja keyword arguments provided
"""

# Metadata that needs to be provided by the table_meta decorator
_table_ref = None

# Auto generated
_columns = None
_data = None
_input_data = None
_rendered_query = None

def __init__(self, data: list[dict] = None) -> None:
"""
Initialize a BaseMockTable instance.
Expand All @@ -42,7 +74,13 @@ def __init__(self, data: list[dict] = None) -> None:
self._data = [] if data is None else data

@classmethod
def from_inputs(cls, query, input_data: dict[str, "BaseMockTable"] = None, query_template_kwargs: dict = None):
def from_dicts(cls, data: list[dict] = None):
Somtom marked this conversation as resolved.
Show resolved Hide resolved
return cls(data=data)

@classmethod
def from_mocks(cls, query, input_data: list["BaseMockTable"] = None, query_template_kwargs: dict = None):
validate_input_mocks(input_data)

instance = cls(data=[])
query_template = Template(query)

Expand All @@ -56,11 +94,7 @@ def from_inputs(cls, query, input_data: dict[str, "BaseMockTable"] = None, query

def _generate_input_data_cte_snippet(self):
# Convert instances into SQL snippets that serve as input to a CTE
table_ctes = []
for table_name, table_mock in self._input_data.items():
table_query = table_mock.as_sql_input()
table_ctes.append(f"{table_name} AS (\n{table_query}\n)")

table_ctes = [mock_table.as_sql_input() for mock_table in self._input_data]
return ",\n".join(table_ctes)

def _generate_query(
Expand Down Expand Up @@ -89,8 +123,9 @@ def _generate_query(
)

# Replace orignal table references to point them to the mocked data
for table_name in self._input_data.keys():
query = query.replace(table_name, table_name.replace(".", "__"))
for mock_table in self._input_data:
new_reference = mock_table._table_ref.replace(".", "__")
query = query.replace(mock_table._table_ref, new_reference)

# Store last query for debugging
self._last_query = query
Expand Down Expand Up @@ -123,7 +158,7 @@ def _to_sql_row(self, row_data: dict) -> str:

def as_sql_input(self):
"""
Generate a UNION ALL SQL that combines data from all rows.
Generate a UNION ALL SQL CTE that combines data from all rows.

Returns:
str: A SQL query that combines data from all rows.
Expand All @@ -135,7 +170,7 @@ def as_sql_input(self):
snippet += " WHERE FALSE"
else:
snippet = "\nUNION ALL\nSELECT ".join([self._to_sql_row(row_data) for row_data in self._data])
return f"SELECT {snippet}"
return f"{self._table_ref} AS (\n" f"SELECT {snippet}\n" ")"

def assert_equal(self, expected: [dict], ignore_missing_keys: bool = False, ignore_order: bool = True):
"""
Expand Down
8 changes: 5 additions & 3 deletions tests/sql_mock/bigquery/test_table_mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@

from sql_mock.bigquery.column_mocks import Int
from sql_mock.bigquery.table_mocks import BigQueryMockTable
from sql_mock.table_mocks import table_meta


@table_meta(table_ref="mock_test_table")
class MockTestTable(BigQueryMockTable):
id = Int(default=1)

Expand All @@ -25,7 +27,7 @@ def patch_os_environment_variables(mocker):

def test_init_with_environment_variables(mocker):
"""...then the env vars should be used to set the attributes"""
table = BigQueryMockTable()
table = MockTestTable()
assert table.settings.google_application_credentials == "example.json"


Expand All @@ -37,7 +39,7 @@ def test_init_with_missing_configs(mocker):
clear=True,
)
with pytest.raises(ValidationError):
BigQueryMockTable()
MockTestTable()


def test_get_results(mocker):
Expand All @@ -55,7 +57,7 @@ def test_get_results(mocker):
query_job_instance = mock_client.query.return_value
query_job_instance.result.return_value = mock_query_job_result

result = BigQueryMockTable.from_inputs(query="SELECT 1", input_data={"foo.bar": MockTestTable(data=[])})
result = BigQueryMockTable.from_mocks(query="SELECT 1", input_data=[MockTestTable(data=[])])

# Assert the result matches the expected mock result
result.assert_equal(mock_query_job_result)
Loading