# Model Driven Software Engineering for Data Warehousing — Part 2: Data warehoused

> todo:QUOTE.
> 
> — todo:PERSON

This article is the second part of a series of articles in which I want to give an overview of how I think Model Driven Software Engineering (MDSE) can be used for data warehousing. 
This article is about todo:DESCRIPTION.
Last but not least the topics described in this article are explained using an example.

todo:INTRODUCTION

## Generating Data Warehouse Artifacts

Now, we also want to start to work on our actual data warehouse.
At Nippur, the starting point for that is to create historical storage of our data.
We call this the *Historical Data Archive* (HDA).

To do this, first make sure we can actually store the data in HDA table we transform our previous database model into HDA database model and generated DDL code.

In [1]:
from typing import Tuple
def transform_database_model_into_hda_model_and_mappings(database_model: dict) -> Tuple[dict, dict]:
    """
    Extends existing database model to HDA model.
    Also creates model that describes the column mappings between the database and the HDA.

    Parameters
    ----------
    - database_model
      the model that needs to be transformed.

    Returns
    -------
    A tuple (hda_model, mappings_model), where
    - hda_model contains database model for the HDA and ,
    - mappings_model model describing the mappings between the provided database model and the HDA model.
    """
    hda_schemas = []
    hda_model = {
        "schemas": hda_schemas
    }

    schema_mappings = []
    hda_mappings = {
        "schema_mappings": schema_mappings
    }

    for schema in database_model["schemas"]:
        hda_tables = []
        hda_schemas.append({
            "name": f"{schema['name']}_hda",
            "tables": hda_tables
        })

        table_mappings = []
        schema_mappings.append({
            "table_mappings": table_mappings
        })

        for table in schema["tables"]: 
            hda_columns = [{
                "name": "hda_registration_utc",
                "type": "datetime2"
            }] + table["columns"]

            hda_tables.append({
                "name": table["name"],
                "columns": hda_columns,
                "primary_key_constraint": {
                    "name": f"{table['name']}_pkey",
                    "column_names": table["primary_key_constraint"]["column_names"] + ["hda_registration_utc"],
                }
            })

            column_mappings = [{
                "expression": "GETUTCDATE()",
                "target": "hda_registration_utc"
            }]
            table_mappings.append({
                "source": {
                    "schema": schema["name"],
                    "table": table["name"]
                },
                "target": {
                    "schema": f"{schema['name']}_hda",
                    "table": table["name"]
                },
                "column_mappings": column_mappings
            })

            for column in table["columns"]: 
                column_mappings.append({
                    "source": column["name"],
                    "target": column["name"]
                })

    return hda_model, hda_mappings

etl_template_text = """
{% for schema_mapping in schema_mappings %}
{% for table_mapping in schema_mapping.table_mappings %}
INSERT INTO [{{ table_mapping.target.schema }}].[{{ table_mapping.target.table }}] (
{% for column_mapping in table_mapping.column_mappings -%}    
{{ '  ' if loop.index == 1 else ', ' }}[{{ column_mapping.target }}]
{% endfor -%}
)
SELECT
{% for column_mapping in table_mapping.column_mappings -%}   
{{ '  ' if loop.index == 1 else ', ' }}{% if 'expression' in column_mapping %}{{ column_mapping.expression }}{% elif 'source' in column_mapping %}[{{ column_mapping.source }}]{% else %}NULL{% endif %}
{% endfor -%}
FROM
  [{{ table_mapping.source.schema }}].[{{ table_mapping.source.table }}]
;
{% endfor -%}
{% endfor %}
"""

pit_template_text = """
{% for schema in schemas %}
GO
CREATE SCHEMA [{{ schema.name[:-4] }}_pub]
GO

{% for table in schema.tables %}
CREATE OR ALTER FUNCTION [{{ schema.name[:-4] }}_pub].[{{ table.name }}_pit] (
  @timestamp_utc datetime2
)
RETURNS TABLE
AS
  RETURN
    SELECT
    {% for column in table.columns -%}
    {{ '  ' if loop.index == 1 else ', ' }}[{{ column.name }}]
    {% endfor -%}
    FROM
      [{{ schema.name }}].[{{ table.name }}] [t1]
    WHERE
      [hda_registration_utc] < @timestamp_utc
    AND NOT EXISTS (
      SELECT 
        NULL
      FROM
        [{{ schema.name }}].[{{ table.name }}] [t2]
      WHERE
        t2.[hda_registration_utc] > t1.[hda_registration_utc]
    )
;
GO
{% endfor %}

{% endfor %}
"""

def generate_etl_code(mapping_model: dict) -> str:
    return generate_code(etl_template_text, mapping_model)
    
def generate_pit_code(hda_model: dict) -> str:
    return generate_code(pit_template_text, hda_model)

hda_database_model, hda_mapping_model = transform_database_model_into_hda_model_and_mappings(orders_database_model)
visualzize_sql_code(generate_ddl_code(hda_database_model))
visualzize_sql_code(generate_pit_code(hda_database_model))
visualzize_sql_code(generate_etl_code(hda_mapping_model))

NameError: name 'orders_database_model' is not defined

In [None]:
markdown_template_text = """
# Lineage Documentation

{% for schema_mapping in schema_mappings %}
{% for table_mapping in schema_mapping.table_mappings %}
## [{{ table_mapping.target.schema }}].[{{ table_mapping.target.table }}]

| target column | source |
| :----- | :----- | 
{%- for column_mapping in table_mapping.column_mappings %}
| {{ column_mapping.target }} | {% if 'expression' in column_mapping %}`{{ column_mapping.expression }}`{% elif 'source' in column_mapping %}[{{ table_mapping.source.schema }}].[{{table_mapping.source.table  }}].[{{ column_mapping.source }}]{% else %}`NULL`{% endif %} |
{%- endfor -%}
{% endfor %}
{% endfor %}
"""

def generate_lineage_code(mapping_model: dict) -> str:
    return generate_code(markdown_template_text, mapping_model)

from src.article_utils import visualzize_markdown_code
visualzize_markdown_code(generate_lineage_code(hda_mapping_model))