# DJ Runbook - Creating and Linking Nodes

This notebook performs a collection of basic requests to a running DJ server to create and link nodes.

In [1]:
import requests

DJ_PROTOCOL = "http"
DJ_HOST = "localhost"
DJ_PORT = 8000
DJ_URL = f"{DJ_PROTOCOL}://{DJ_HOST}:{DJ_PORT}"

## Create some source nodes.

In [2]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "columns": {
            "payment_id": {"type": "INT"},
            "payment_type": {"type": "INT"},
            "payment_amount": {"type": "FLOAT"},
            "customer_id": {"type": "INT"},
            "account_type": {"type": "STR"},
        },
        "description": "A source table for revenue data",
        "mode": "published",
        "name": "revenue_source",
        "type": "source",
    },
)
response.json()

{'node_revision_id': 13,
 'node_id': 13,
 'type': 'source',
 'name': 'revenue_source',
 'display_name': 'Revenue Source',
 'version': '1',
 'description': 'A source table for revenue data',
 'query': None,
 'availability': None,
 'columns': [{'name': 'payment_id', 'type': 'INT'},
  {'name': 'payment_type', 'type': 'INT'},
  {'name': 'payment_amount', 'type': 'FLOAT'},
  {'name': 'customer_id', 'type': 'INT'},
  {'name': 'account_type', 'type': 'STR'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:06.679701+00:00',
 'created_at': '2023-02-13T03:50:06.677898+00:00'}

In [3]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "columns": {
            "id": {"type": "INT"},
            "account_type_name": {"type": "STR"},
            "account_type_classification": {"type": "INT"},
            "preferred_payment_method": {"type": "INT"},
        },
        "description": "A source table for account type data",
        "mode": "published",
        "name": "account_type_table",
        "type": "source",
    },
)
response.json()

{'node_revision_id': 14,
 'node_id': 14,
 'type': 'source',
 'name': 'account_type_table',
 'display_name': 'Account Type Table',
 'version': '1',
 'description': 'A source table for account type data',
 'query': None,
 'availability': None,
 'columns': [{'name': 'id', 'type': 'INT'},
  {'name': 'account_type_name', 'type': 'STR'},
  {'name': 'account_type_classification', 'type': 'INT'},
  {'name': 'preferred_payment_method', 'type': 'INT'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:06.902134+00:00',
 'created_at': '2023-02-13T03:50:06.900526+00:00'}

In [4]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "columns": {
            "id": {"type": "INT"},
            "payment_type_name": {"type": "STR"},
            "payment_type_classification": {"type": "INT"},
        },
        "description": "A source table for different types of payments",
        "mode": "published",
        "name": "payment_type_table",
        "type": "source",
    },
)
response.json()

{'node_revision_id': 15,
 'node_id': 15,
 'type': 'source',
 'name': 'payment_type_table',
 'display_name': 'Payment Type Table',
 'version': '1',
 'description': 'A source table for different types of payments',
 'query': None,
 'availability': None,
 'columns': [{'name': 'id', 'type': 'INT'},
  {'name': 'payment_type_name', 'type': 'STR'},
  {'name': 'payment_type_classification', 'type': 'INT'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:07.075401+00:00',
 'created_at': '2023-02-13T03:50:07.074081+00:00'}

In [5]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "columns": {
            "customer_id": {"type": "INT"},
            "first_name": {"type": "STR"},
            "last_name": {"type": "STR"},
        },
        "description": "A source table for customer data",
        "mode": "published",
        "name": "customers_table",
        "type": "source",
    },
)
response.json()

{'node_revision_id': 16,
 'node_id': 16,
 'type': 'source',
 'name': 'customers_table',
 'display_name': 'Customers Table',
 'version': '1',
 'description': 'A source table for customer data',
 'query': None,
 'availability': None,
 'columns': [{'name': 'customer_id', 'type': 'INT'},
  {'name': 'first_name', 'type': 'STR'},
  {'name': 'last_name', 'type': 'STR'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:07.232772+00:00',
 'created_at': '2023-02-13T03:50:07.231517+00:00'}

In [6]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "columns": {
            "customer_id": {"type": "INT"},
            "account_type": {"type": "STR"},
            "default_payment_type": {"type": "INT"},
        },
        "description": "A source table for customer default payment preference",
        "mode": "published",
        "name": "default_payment_account_table",
        "type": "source",
    },
)
response.json()

{'node_revision_id': 17,
 'node_id': 17,
 'type': 'source',
 'name': 'default_payment_account_table',
 'display_name': 'Default Payment Account Table',
 'version': '1',
 'description': 'A source table for customer default payment preference',
 'query': None,
 'availability': None,
 'columns': [{'name': 'customer_id', 'type': 'INT'},
  {'name': 'account_type', 'type': 'STR'},
  {'name': 'default_payment_type', 'type': 'INT'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:07.387441+00:00',
 'created_at': '2023-02-13T03:50:07.386160+00:00'}

## Create some dimension nodes.

In [7]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "description": "Payment type dimensions",
        "query": "SELECT id, payment_type_name, payment_type_classification FROM payment_type_table",
        "mode": "published",
        "name": "payment_type",
        "type": "dimension",
    },
)
response.json()

{'node_revision_id': 18,
 'node_id': 18,
 'type': 'dimension',
 'name': 'payment_type',
 'display_name': 'Payment Type',
 'version': '1',
 'description': 'Payment type dimensions',
 'query': 'SELECT id, payment_type_name, payment_type_classification FROM payment_type_table',
 'availability': None,
 'columns': [{'name': 'id', 'type': 'INT'},
  {'name': 'payment_type_name', 'type': 'STR'},
  {'name': 'payment_type_classification', 'type': 'INT'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:07.543238+00:00',
 'created_at': '2023-02-13T03:50:07.542399+00:00'}

In [8]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "description": "Customer dimension",
        "query": "SELECT customer_id, first_name, last_name FROM customers_table",
        "mode": "published",
        "name": "customers",
        "type": "dimension",
    },
)
response.json()

{'node_revision_id': 19,
 'node_id': 19,
 'type': 'dimension',
 'name': 'customers',
 'display_name': 'Customers',
 'version': '1',
 'description': 'Customer dimension',
 'query': 'SELECT customer_id, first_name, last_name FROM customers_table',
 'availability': None,
 'columns': [{'name': 'customer_id', 'type': 'INT'},
  {'name': 'first_name', 'type': 'STR'},
  {'name': 'last_name', 'type': 'STR'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:07.739831+00:00',
 'created_at': '2023-02-13T03:50:07.739152+00:00'}

In [9]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "description": "Account type dimension",
        "query": "SELECT id, account_type_name, account_type_classification FROM account_type_table",
        "mode": "published",
        "name": "account_type",
        "type": "dimension",
    },
)
response.json()

{'node_revision_id': 20,
 'node_id': 20,
 'type': 'dimension',
 'name': 'account_type',
 'display_name': 'Account Type',
 'version': '1',
 'description': 'Account type dimension',
 'query': 'SELECT id, account_type_name, account_type_classification FROM account_type_table',
 'availability': None,
 'columns': [{'name': 'id', 'type': 'INT'},
  {'name': 'account_type_name', 'type': 'STR'},
  {'name': 'account_type_classification', 'type': 'INT'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:07.921304+00:00',
 'created_at': '2023-02-13T03:50:07.920718+00:00'}

## Create some transform nodes.

In [10]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "description": "Revenue data filtered to large payments only",
        "query": "SELECT payment_id, payment_amount, customer_id, account_type FROM revenue_source LEFT JOIN payment_type on payment_type = payment_type.id WHERE payment_amount > 1000000",
        "mode": "published",
        "name": "large_revenue_payments_only",
        "type": "transform",
    },
)
response.json()

{'node_revision_id': 21,
 'node_id': 21,
 'type': 'transform',
 'name': 'large_revenue_payments_only',
 'display_name': 'Large Revenue Payments Only',
 'version': '1',
 'description': 'Revenue data filtered to large payments only',
 'query': 'SELECT payment_id, payment_amount, customer_id, account_type FROM revenue_source LEFT JOIN payment_type on payment_type = payment_type.id WHERE payment_amount > 1000000',
 'availability': None,
 'columns': [{'name': 'payment_id', 'type': 'INT'},
  {'name': 'payment_amount', 'type': 'FLOAT'},
  {'name': 'customer_id', 'type': 'INT'},
  {'name': 'account_type', 'type': 'STR'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:08.105057+00:00',
 'created_at': '2023-02-13T03:50:08.104387+00:00'}

In [11]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "description": "Revenue data filtered to large business payments only",
        "query": "SELECT payment_id, payment_amount, customer_id, account_type FROM revenue_source WHERE payment_amount > 1000000 AND account_type = 'BUSINESS'",
        "mode": "published",
        "name": "large_revenue_payments_and_business_only",
        "type": "transform",
    },
)
response.json()

{'node_revision_id': 22,
 'node_id': 22,
 'type': 'transform',
 'name': 'large_revenue_payments_and_business_only',
 'display_name': 'Large Revenue Payments And Business Only',
 'version': '1',
 'description': 'Revenue data filtered to large business payments only',
 'query': "SELECT payment_id, payment_amount, customer_id, account_type FROM revenue_source WHERE payment_amount > 1000000 AND account_type = 'BUSINESS'",
 'availability': None,
 'columns': [{'name': 'payment_id', 'type': 'INT'},
  {'name': 'payment_amount', 'type': 'FLOAT'},
  {'name': 'customer_id', 'type': 'INT'},
  {'name': 'account_type', 'type': 'STR'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:08.337469+00:00',
 'created_at': '2023-02-13T03:50:08.336711+00:00'}

## Create some metric nodes.

In [12]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "description": "Total company revenue",
        "query": "SELECT sum(payment_amount) as total_revenue FROM revenue_source",
        "mode": "published",
        "name": "total_revenue",
        "type": "metric",
    },
)
response.json()

{'node_revision_id': 23,
 'node_id': 23,
 'type': 'metric',
 'name': 'total_revenue',
 'display_name': 'Total Revenue',
 'version': '1',
 'description': 'Total company revenue',
 'query': 'SELECT sum(payment_amount) as total_revenue FROM revenue_source',
 'availability': None,
 'columns': [{'name': 'total_revenue', 'type': 'FLOAT'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:08.529165+00:00',
 'created_at': '2023-02-13T03:50:08.528590+00:00'}

In [13]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "description": "Total number of account types",
        "query": "SELECT count(id) as num_accounts FROM account_type",
        "mode": "published",
        "name": "number_of_account_types",
        "type": "metric",
    },
)
response.json()

{'node_revision_id': 24,
 'node_id': 24,
 'type': 'metric',
 'name': 'number_of_account_types',
 'display_name': 'Number Of Account Types',
 'version': '1',
 'description': 'Total number of account types',
 'query': 'SELECT count(id) as num_accounts FROM account_type',
 'availability': None,
 'columns': [{'name': 'num_accounts', 'type': 'INT'}],
 'tables': [],
 'updated_at': '2023-02-13T03:50:08.712647+00:00',
 'created_at': '2023-02-13T03:50:08.712059+00:00'}

## Create some cube nodes

In [14]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "cube_elements": ["total_revenue", "payment_type", "customers"],
        "description": "A cube of Total Revenue grouped by customer and payment type",
        "mode": "published",
        "name": "total_revenue_by_customer_payment_type",
        "type": "cube",
    },
)
response.json()

{'node_revision_id': 25,
 'node_id': 25,
 'type': 'cube',
 'name': 'total_revenue_by_customer_payment_type',
 'display_name': 'Total Revenue By Customer Payment Type',
 'version': '1',
 'description': 'A cube of Total Revenue grouped by customer and payment type',
 'query': None,
 'availability': None,
 'columns': [],
 'tables': [],
 'updated_at': '2023-02-13T03:50:08.898922+00:00',
 'created_at': '2023-02-13T03:50:08.892201+00:00'}

In [15]:
response = requests.post(
    f"{DJ_URL}/nodes/",
    json={
        "cube_elements": ["number_of_account_types", "account_type"],
        "description": "A cube of number of accounts grouped by account type",
        "mode": "published",
        "name": "number_of_accounts_by_account_type",
        "type": "cube",
    },
)
response.json()

{'node_revision_id': 26,
 'node_id': 26,
 'type': 'cube',
 'name': 'number_of_accounts_by_account_type',
 'display_name': 'Number Of Accounts By Account Type',
 'version': '1',
 'description': 'A cube of number of accounts grouped by account type',
 'query': None,
 'availability': None,
 'columns': [],
 'tables': [],
 'updated_at': '2023-02-13T03:50:09.058746+00:00',
 'created_at': '2023-02-13T03:50:09.052833+00:00'}

## Add a catalog

In [16]:
response = requests.post(
    f"{DJ_URL}/catalogs/",
    json={"name": "test"},
)
response.json()

{'name': 'test', 'engines': []}

## Add tables to nodes

In [17]:
response = requests.post(
    f"{DJ_URL}/nodes/revenue_source/table/",
    json={
        "database_name": "postgres",
        "catalog_name": "test",
        "cost": 1.0,
        "schema": "accounting",
        "table": "revenue",
        "columns": [
            {"name": "payment_id", "type": "INT"},
            {"name": "payment_type", "type": "INT"},
            {"name": "payment_amount", "type": "FLOAT"},
            {"name": "customer_id", "type": "INT"},
            {"name": "account_type", "type": "STR"},
        ],
    },
)
response.json()

{'message': 'Table revenue has been successfully linked to node revenue_source'}

In [18]:
response = requests.post(
    f"{DJ_URL}/nodes/account_type_table/table/",
    json={
        "database_name": "postgres",
        "catalog_name": "test",
        "cost": 1.0,
        "schema": "accounting",
        "table": "account_type",
        "columns": [
            {"name": "id", "type": "INT"},
            {"name": "account_type_name", "type": "STR"},
            {"name": "account_type_classification", "type": "INT"},
            {"name": "preferred_payment_method", "type": "INT"},
        ],
    },
)
response.json()

{'message': 'Table account_type has been successfully linked to node account_type_table'}

In [19]:
response = requests.post(
    f"{DJ_URL}/nodes/payment_type_table/table/",
    json={
        "database_name": "postgres",
        "catalog_name": "test",
        "cost": 1.0,
        "schema": "accounting",
        "table": "payment_type",
        "columns": [
            {"name": "id", "type": "INT"},
            {"name": "payment_type_name", "type": "STR"},
            {"name": "payment_type_classification", "type": "INT"},
        ],
    },
)
response.json()

{'message': 'Table payment_type has been successfully linked to node payment_type_table'}

In [20]:
response = requests.post(
    f"{DJ_URL}/nodes/customers_table/table/",
    json={
        "database_name": "postgres",
        "catalog_name": "test",
        "cost": 1.0,
        "schema": "accounting",
        "table": "customers",
        "columns": [
            {"name": "customer_id", "type": "INT"},
            {"name": "first_name", "type": "STR"},
            {"name": "last_name", "type": "STR"},
        ],
    },
)
response.json()

{'message': 'Table customers has been successfully linked to node customers_table'}

In [21]:
response = requests.post(
    f"{DJ_URL}/nodes/default_payment_account_table/table/",
    json={
        "database_name": "postgres",
        "catalog_name": "test",
        "cost": 1.0,
        "schema": "accounting",
        "table": "default_payment_account",
        "columns": [
            {"name": "customer_id", "type": "INT"},
            {"name": "account_type", "type": "STR"},
            {"name": "default_payment_type", "type": "INT"},
        ],
    },
)
response.json()

{'message': 'Table default_payment_account has been successfully linked to node default_payment_account_table'}

## Label Foreign Keys With Dimension Nodes

In [22]:
response = requests.post(
    f"{DJ_URL}/nodes/revenue_source/columns/payment_type/?dimension=payment_type"
)
response.json()

{'message': 'Dimension node payment_type has been successfully linked to column payment_type on node revenue_source'}

In [23]:
response = requests.post(
    f"{DJ_URL}/nodes/revenue_source/columns/customer_id/?dimension=customers"
)
response.json()

{'message': 'Dimension node customers has been successfully linked to column customer_id on node revenue_source'}

In [24]:
response = requests.post(
    f"{DJ_URL}/nodes/default_payment_account_table/columns/account_type/?dimension=account_type"
)
response.json()

{'message': 'Dimension node account_type has been successfully linked to column account_type on node default_payment_account_table'}

In [25]:
response = requests.post(
    f"{DJ_URL}/nodes/default_payment_account_table/columns/default_payment_type/?dimension=payment_type"
)
response.json()

{'message': 'Dimension node payment_type has been successfully linked to column default_payment_type on node default_payment_account_table'}

## Generate SQL For Metrics and Dimensions

In [26]:
response = requests.get(
    f"{DJ_URL}/metrics/total_revenue/sql/?dimensions=payment_type.id"
).json()
print(response["sql"])

SELECT  sum(test.accounting.revenue.payment_amount) AS total_revenue,
	_payment_type.id 
 FROM test.accounting.revenue
LEFT JOIN (SELECT  test.accounting.payment_type.id,
	test.accounting.payment_type.payment_type_name,
	test.accounting.payment_type.payment_type_classification 
 FROM test.accounting.payment_type
 
) AS _payment_type
        ON test.accounting.revenue.payment_type = _payment_type.id 
 GROUP BY  _payment_type.id


In [27]:
response = requests.get(
    f"{DJ_URL}/metrics/basic.num_users/sql/?dimensions=basic.transform.country_agg.country"
).json()
print(response["sql"])

SELECT  SUM(basic_DOT_transform_DOT_country_agg.num_users),
	basic_DOT_transform_DOT_country_agg.country 
 FROM (SELECT  basic.dim_users.country,
	COUNT(DISTINCT basic.dim_users.id) AS num_users 
 FROM basic.dim_users
 
 GROUP BY  1) AS basic_DOT_transform_DOT_country_agg
 
 GROUP BY  basic_DOT_transform_DOT_country_agg.country
