In [1]:
# default_exp dictionary

# Dictionary Page Generation

<br>

### Imports

In [116]:
import json
import numpy as np
import pandas as pd

from frictionless import Package

from jinja2 import Template

In [56]:
from IPython.display import JSON, Markdown

In [65]:
datapackage_json_fp = '../data/dictionary/datapackage.json'

package = Package(datapackage_json_fp, profile='tabular-data-package')
ids_resource = package.get_resource('ids')

df_ids = ids_resource.to_pandas()

df_ids.head(3)

Unnamed: 0_level_0,gppd_idnr,esail_id,name,sett_bmu_id,ngc_bmu_id,4c_offshore_id,windpowernet_id,wikidata_id,wikipedia_id,power_technology_id,eutl_id,eic_id,cfd_id
osuked_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
10000,,MARK,Rothes Bio-Plant CHP,"[E_MARK-1, E_MARK-2]","[MARK-1, MARK-2]",,,,,,,[48W000000MARK-1D],
10001,"[GBR1000377, GBR1000369]",DIDC,Didcot,"[T_DIDC1, T_DIDC2, T_DIDC4, T_DIDC3, T_DIDC1G,...","[DIDC1, DIDC2, DIDC4, DIDC3, DIDC1G, DIDC2G, D...",,,,,,[97165],"[48W00000DIDC01G1, 48W00000DIDC02GZ, 48W00000D...",
10002,"[GBR1000374, GBR1000375]",ABTH,Aberthaw B,"[T_ABTH7, T_ABTH8, T_ABTH9, T_ABTH7G, T_ABTH8G...","[ABTH7, ABTH8, ABTH9, ABTH7G, ABTH8G, ABTH9G]",,,,,,[97175],"[48W0000000ABTH7Y, 48W0000000ABTH8W, 48W000000...",


In [53]:
# need to include the file format

def construct_metadata_table_str(package):
    s_metadata = pd.Series({
        'Version': package.version,
        'Contributors': ', '.join([f'{contributor["title"]} ({contributor["role"].capitalize()})' for contributor in package.contributors]),
        'Key words': ', '.join(package.keywords),
        'Licences': ', '.join([f'[{license["name"]}]({license["path"]})' for license in package.licenses]),
    })
    
    s_metadata.index.name = 'Attribute'
    s_metadata.name = 'Value(s)'
    
    md_str = s_metadata.to_markdown()
    
    return md_str

In [59]:
metadata_table_str = construct_metadata_table_str(package)

Markdown(metadata_table_str)

| Attribute    | Value(s)                                                  |
|:-------------|:----------------------------------------------------------|
| Version      | 1.0.0                                                     |
| Contributors | Ayrton Bourn (Author)                                     |
| Key words    | power plants, ids                                         |
| Licences     | [CC-BY-4.0](https://creativecommons.org/licenses/by/4.0/) |

In [57]:
#exports
def construct_field_desc_table_str(package, resource='ids'):
    s_field_descs = pd.Series({field['title']: field['description'] for field in package.get_resource(resource).schema.fields})

    s_field_descs.index.name = 'Field'
    s_field_descs.name = 'Description'
    
    md_str = s_field_descs.to_markdown()
    
    return md_str

In [58]:
field_desc_table_str = construct_field_desc_table_str(package)

Markdown(field_desc_table_str)

| Field                | Description                                                                                                                                                                                                                                        |
|:---------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| OSUKED ID            | The identifier used to refer to power plants in the Subak data cooperative                                                                                                                                                                         |
| GPPD ID              | The identifier used in the global power plant database                                                                                                                                                                                             |
| ESAIL ID             | The identifier used by the Energy Systems and Artificial Intelligence Lab at UCL                                                                                                                                                                   |
| Common Name          | The name used to refer to the power plant site                                                                                                                                                                                                     |
| Settlement BMU ID    | The Balancing Mechanism Unit identifier used for settlement purposes by Elexon                                                                                                                                                                     |
| National Grid BMU ID | The Balancing Mechanism Unit identifier used by the National Grid                                                                                                                                                                                  |
| 4C-Offshore ID       | The identifier used by 4C-Offshore                                                                                                                                                                                                                 |
| WindPowerNet ID      | The identifier used by the WindPower.Net                                                                                                                                                                                                           |
| Wikidata ID          | The identifier used by Wikidata                                                                                                                                                                                                                    |
| Wikipedia ID         | The identifier used by Wikipedia                                                                                                                                                                                                                   |
| Power-Technology ID  | The identifier used by Power-Technology                                                                                                                                                                                                            |
| EUTL ID              | The identifier used in the European (Emissions Trading Scheme) Transaction Logs                                                                                                                                                                    |
| EIC ID               | The Energy Identification Code or EIC is a 16-character code used in Europe to uniquely identify entities and objects related to the electricity and gas sector. The EIC code is used for: Transmission System Operators, Market Participants etc. |
| CfD ID               | The identifier used by the Low Carbon Contracts Company to uniquely identify Contracts for Difference projects                                                                                                                                     |

In [110]:
#exports
def get_dp_field_to_url_format_str(datapackage_json_fp):
    package = Package(datapackage_json_fp, profile='tabular-data-package')
    ids_resource = package.get_resource('ids')

    id_field_to_url_format_str = {
        field['name']: field['url_format'] 
        for field 
        in ids_resource['schema']['fields'] 
        if 'url_format' in field.keys()
    }

    return id_field_to_url_format_str

def get_dp_field_to_title(datapackage_json_fp):
    package = Package(datapackage_json_fp, profile='tabular-data-package')
    ids_resource = package.get_resource('ids')

    id_field_to_title = {
        field['name']: field['title'] 
        for field 
        in ids_resource['schema']['fields'] 
    }

    return id_field_to_title

def format_id_values(id_values, id_type, id_field_to_url_format_str):
    if id_type in id_field_to_url_format_str.keys():
        url_format_str = id_field_to_url_format_str[id_type]
        id_values_strs = [f'[{id_value}]({url_format_str.format(value=id_value)})' for id_value in id_values]
    else:
        id_values_strs = [str(id_value) for id_value in id_values] 
    
    return id_values_strs

construct_linked_idxs = lambda df_ids_clean: [
    f'[{idx}](https://osuked.github.io/Power-Station-Dictionary/objects/{name.replace(" ", "%20")})' 
    if name != '-' 
    else f'[{idx}](https://osuked.github.io/Power-Station-Dictionary/objects/{idx})' 
    for idx, name 
    in df_ids_clean['Common Name'].items()
]

def construct_linked_ids_table_str(package, datapackage_json_fp):
    id_field_to_url_format_str = get_dp_field_to_url_format_str(datapackage_json_fp)
    id_field_to_title = get_dp_field_to_title(datapackage_json_fp)

    df_ids_clean = pd.DataFrame(index=df_ids.index, columns=id_field_to_title.values())

    for osuked_id, row in df_ids.iterrows():
        row = pd.Series({
            id_field_to_title[id_type]: (
                ', '.join([str(id_) for id_ in format_id_values(id_values, id_type, id_field_to_url_format_str)]) if isinstance(id_values, list) 
                else f'[{id_values}]({id_field_to_url_format_str[id_type].format(value=id_values)})' if (id_type in id_field_to_url_format_str.keys()) and (id_values is not None)
                else id_values
            ) 
            for id_type, id_values 
            in row.items()
        }).fillna('-')

        df_ids_clean.loc[osuked_id] = row

    df_ids_clean = df_ids_clean.drop(columns='OSUKED ID')
    df_ids_clean.index = construct_linked_idxs(df_ids_clean)
    df_ids_clean.index.name = 'OSUKED ID'
    
    return df_ids_clean.to_markdown()

In [113]:
linked_ids_table_str = construct_linked_ids_table_str(package, datapackage_json_fp)

# Markdown(linked_ids_table_str)

In [119]:
def populate_and_save_template(template_fp, save_fp, render_kwargs):
    rendered_str = Template(open(template_fp).read()).render(**render_kwargs)

    with open(save_fp, 'w', encoding='utf-8') as f:
        try:
            f.write(rendered_str)
        except e as exc:
            raise exc
            
    return None

In [120]:
template_fp = '../templates/dictionary_page.md'
save_fp = f'../docs/dictionary.md'

render_kwargs = {
    'title': package.title,
    'description': package.description,
    'metadata_table': construct_metadata_table_str(package),
    'field_desc_table': construct_field_desc_table_str(package),
    'linked_ids_table': construct_linked_ids_table_str(package, datapackage_json_fp),
}

populate_and_save_template(template_fp, save_fp, render_kwargs)

In [62]:
JSON(ids_resource.schema)

<IPython.core.display.JSON object>

In [35]:
package

{'profile': 'tabular-data-package',
 'resources': [{'name': 'ids',
   'path': 'ids.csv',
   'profile': 'tabular-data-resource',
   'schema': {'fields': [{'name': 'osuked_id',
      'required': True,
      'type': 'integer',
      'format': 'default',
      'description': 'The identifier used to refer to power plants in the Subak data cooperative',
      'title': 'OSUKED ID',
      'hierarchy': 'root'},
     {'name': 'gppd_idnr',
      'type': 'array',
      'format': ', ',
      'array_item': {'type': 'string'},
      'title': 'GPPD ID',
      'description': 'The identifier used in the global power plant database',
      'hierarchy': 'equivalent/child'},
     {'name': 'esail_id',
      'type': 'string',
      'format': 'default',
      'description': 'The identifier used by the Energy Systems and Artificial Intelligence Lab at UCL',
      'title': 'ESAIL ID',
      'hierarchy': 'equivalent'},
     {'name': 'name',
      'type': 'string',
      'format': 'default',
      'title': 'Commo