In [1]:
# default_exp population

In [2]:
#exports
import json
import numpy as np
import pandas as pd
from frictionless import Package

from powerdict import extraction

import os
from tqdm import tqdm
from warnings import warn

from jinja2 import Template

In [3]:
from IPython.display import JSON, Markdown

<br>

### Data Formatting

In [4]:
site_data_fp = '../data/intermediate/site_data.json'

with open(site_data_fp, 'r') as f:
    site_data = json.load(f)
    
JSON(extraction.get_dict_head(site_data))

<IPython.core.display.JSON object>

In [5]:
#exports
def get_dp_field_to_url_format_str(datapackage_json_fp):
    package = Package(datapackage_json_fp, profile='tabular-data-package')
    ids_resource = package.get_resource('ids')

    id_field_to_url_format_str = {
        field['name']: field['url_format'] 
        for field 
        in ids_resource['schema']['fields'] 
        if 'url_format' in field.keys()
    }

    return id_field_to_url_format_str

In [6]:
datapackage_json_fp = '../data/dictionary/datapackage.json'

id_field_to_url_format_str = get_dp_field_to_url_format_str(datapackage_json_fp)

id_field_to_url_format_str

{'4c_offshore_id': 'https://www.4coffshore.com/windfarms/united-kingdom/{value}.html',
 'windpowernet_id': 'https://www.thewindpower.net/{value}.php',
 'wikidata_id': 'https://www.wikidata.org/wiki/{value}',
 'wikipedia_id': 'https://en.wikipedia.org/wiki/{value}',
 'power_technology_id': 'https://www.power-technology.com/projects/{value}'}

In [7]:
#exports
def get_dp_field_to_title(datapackage_json_fp):
    package = Package(datapackage_json_fp, profile='tabular-data-package')
    ids_resource = package.get_resource('ids')

    id_field_to_title = {
        field['name']: field['title'] 
        for field 
        in ids_resource['schema']['fields'] 
    }

    return id_field_to_title

In [8]:
id_field_to_title = get_dp_field_to_title(datapackage_json_fp)

id_field_to_title

{'dictionary_id': 'Dictionary ID',
 'gppd_idnr': 'GPPD ID',
 'esail_id': 'ESAIL ID',
 'name': 'Common Name',
 'sett_bmu_id': 'Settlement BMU ID',
 'ngc_bmu_id': 'National Grid BMU ID',
 '4c_offshore_id': '4C-Offshore ID',
 'windpowernet_id': 'WindPowerNet ID',
 'wikidata_id': 'Wikidata ID',
 'wikipedia_id': 'Wikipedia ID',
 'power_technology_id': 'Power-Technology ID',
 'eutl_id': 'EUTL ID',
 'eic_id': 'EIC ID',
 'cfd_id': 'CfD ID',
 'jrc_id': 'JRC ID',
 'iaea_id': 'IAEA ID',
 'old_repd_id': 'REPD ID (Old)',
 'new_repd_id': 'REPD ID (New)'}

In [9]:
#exports
def format_id_values(id_values, id_type, id_field_to_url_format_str):
    if id_type in id_field_to_url_format_str.keys():
        url_format_str = id_field_to_url_format_str[id_type]
        id_values_strs = [f'[{id_value}]({url_format_str.format(value=id_value)})' for id_value in id_values]
    else:
        id_values_strs = [str(id_value) for id_value in id_values] 
    
    return id_values_strs

In [10]:
id_type = 'power_technology_id'
id_values = ['hornsea-project-one-north-sea', 'hornsea-project-two-north-sea']
url_format_str = 'https://www.power-technology.com/projects/{value}'

id_values_strs = format_id_values(id_values, id_type, id_field_to_url_format_str)

id_values_strs

['[hornsea-project-one-north-sea](https://www.power-technology.com/projects/hornsea-project-one-north-sea)',
 '[hornsea-project-two-north-sea](https://www.power-technology.com/projects/hornsea-project-two-north-sea)']

In [11]:
#exports
def single_site_data_to_ids_df(single_site_data, root_id, datapackage_json_fp, root_id_type='dictionary_id'):
    id_field_to_url_format_str = get_dp_field_to_url_format_str(datapackage_json_fp)
    id_field_to_title = get_dp_field_to_title(datapackage_json_fp)
    
    df_site_ids = pd.DataFrame([{'Relationship': 'Root', 'ID Type': id_field_to_title[root_id_type], 'ID(s)': root_id}])

    hierarchy_level_to_relationship = {
        'parent': 'parent',
        'child': 'Related',
        'equivalent': 'Equivalent'
    }

    for hierarchy_level, ids in single_site_data['id_hierarchies'].items():
        if len(ids) >= 1:
            ids = {
                id_field_to_title[id_type]: (
                    ', '.join([str(id_) for id_ in format_id_values(id_values, id_type, id_field_to_url_format_str)]) if isinstance(id_values, list) 
                    else f'[{id_values}]({id_field_to_url_format_str[id_type].format(value=id_values)})' if id_type in id_field_to_url_format_str.keys()
                    else id_values
                ) 
                for id_type, id_values 
                in ids.items()
            }
                
            relationship = hierarchy_level_to_relationship[hierarchy_level]

            df_site_ids = df_site_ids.append(pd
                                             .Series(ids)
                                             .reset_index()
                                             .assign(Relationship=relationship)
                                             .rename(columns={'index': 'ID Type', 0: 'ID(s)'})
                                            )

    if df_site_ids.size >= 1:
        df_site_ids = df_site_ids.set_index(['Relationship', 'ID Type'])
        
    return df_site_ids

def single_site_data_to_ids_md_str(single_site_data, root_id, datapackage_json_fp):
    df_site_ids = single_site_data_to_ids_df(single_site_data, root_id, datapackage_json_fp)
    site_ids_md_table = df_site_ids.reset_index().to_markdown(index=False)
    site_ids_md_str = '### Identifiers\n\n' + site_ids_md_table
    
    return site_ids_md_str

In [12]:
root_id = '10004'
single_site_data = site_data[root_id]

site_ids_md_str = single_site_data_to_ids_md_str(single_site_data, root_id, datapackage_json_fp)

Markdown(site_ids_md_str)

### Identifiers

| Relationship   | ID Type              | ID(s)                                                                                                  |
|:---------------|:---------------------|:-------------------------------------------------------------------------------------------------------|
| Root           | Dictionary ID        | 10004                                                                                                  |
| Related        | GPPD ID              | GBR0000174, GBR1000112                                                                                 |
| Related        | Settlement BMU ID    | T_DRAXX-1, T_DRAXX-2, T_DRAXX-3, T_DRAXX-4, T_DRAXX-5, T_DRAXX-6, T_DRAXX-10G, T_DRAXX-12G, T_DRAXX-9G |
| Related        | National Grid BMU ID | DRAXX-1, DRAXX-2, DRAXX-3, DRAXX-4, DRAXX-5, DRAXX-6, DRAXX-10G, DRAXX-12G, DRAXX-9G                   |
| Related        | EIC ID               | 48W00000DRAXX-56, 48W00000DRAXX-64, 48W000DRAXX-10G9, 48W000DRAXX-12G3, 48W0000DRAXX-9GR               |
| Equivalent     | ESAIL ID             | DRAXX                                                                                                  |
| Equivalent     | Common Name          | Drax                                                                                                   |
| Equivalent     | EUTL ID              | 96842                                                                                                  |
| Equivalent     | CfD ID               | INV-DRX-001                                                                                            |

In [13]:
#exports
filter_dict = lambda dict_, keys_to_select: {k: dict_[k] for k in keys_to_select}

def get_datapackage_url_to_alt_indexes(single_site_data):
    datapackage_url_to_alt_indexes = {}

    if 'datasets' in single_site_data.keys():
        for datapackage_url, dataset_ref in single_site_data['datasets'].items():
            alt_indexes = []
            
            if 'alt_indexes' in dataset_ref['related_resources'][0].keys():
                alt_indexes += dataset_ref['related_resources'][0]['alt_indexes']
            
            datapackage_url_to_alt_indexes[datapackage_url] = alt_indexes
    
    return datapackage_url_to_alt_indexes

def get_datapackage_url_to_attributes(single_site_data):
    datapackage_url_to_attributes = {}

    if 'attributes' in single_site_data.keys():
        for attr in single_site_data['attributes']:
            datapackage_url = attr['source']
            attribute_values = filter_dict(attr, ['attribute', 'value', 'id'])

            if datapackage_url not in datapackage_url_to_attributes.keys():
                datapackage_url_to_attributes[datapackage_url] = []

            datapackage_url_to_attributes[datapackage_url] += [attribute_values]

    return datapackage_url_to_attributes

In [14]:
datapackage_url_to_alt_indexes = get_datapackage_url_to_alt_indexes(single_site_data)
datapackage_url_to_attrs = get_datapackage_url_to_attributes(single_site_data)

JSON(datapackage_url_to_attrs)

<IPython.core.display.JSON object>

In [15]:
#exports
get_attrs_df_index_cols = lambda df_attrs: ['attribute'] + [elem for elem in df_attrs.columns if elem not in ('attribute', 'id', 0)]

def set_multi_index_names(df, names, capitalise=True):
    if capitalise == True:
        names = [name.capitalize() for name in names]
        
    df.index.names = names
    
    return df

def create_multi_index_attrs_df(attributes, alt_indexes):
    df_attrs = (
        pd.DataFrame(attributes)
        .set_index(['attribute', 'id'])
        ['value']
        .apply(pd.Series)
        .drop_duplicates()
        .stack()
        .reset_index()
        .pipe(lambda df: df.pivot(get_attrs_df_index_cols(df), 'id', 0))
        .pipe(set_multi_index_names, ['attribute']+alt_indexes)
    )
    
    attr_ids = list(df_attrs.columns)
    
    if df_attrs.shape[1] == 1:
        df_attrs.columns.name = ''
        df_attrs.columns = ['value']

    return df_attrs, attr_ids

In [16]:
datapackage_url = 'https://raw.githubusercontent.com/OSUKED/Dictionary-Datasets/main/datasets/annual-output/datapackage.json'

alt_indexes = datapackage_url_to_alt_indexes[datapackage_url]
attributes = datapackage_url_to_attrs[datapackage_url]

df_attrs, attr_ids = create_multi_index_attrs_df(attributes, alt_indexes)

df_attrs

Unnamed: 0_level_0,id,DRAXX-1,DRAXX-10G,DRAXX-12G,DRAXX-2,DRAXX-3,DRAXX-4,DRAXX-5,DRAXX-6,DRAXX-9G
Attribute,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Annual Output (MWh),2016,5014662.678,42.712,23.04,4591242.682,3270868.966,2832039.082,2138424.28,2088242.122,27.692
Annual Output (MWh),2017,4036655.434,135.212,81.496,4165307.984,4754720.16,2515965.612,3020489.48,1814090.702,134.016
Annual Output (MWh),2018,5272782.114,194.752,200.656,3775143.036,3765275.156,1827026.434,1671123.542,1979873.838,123.504
Annual Output (MWh),2019,4759282.278,178.772,139.348,2833499.662,3585051.598,2607930.56,416402.33,297244.178,44.04
Annual Output (MWh),2020,5016991.292,218.848,242.96,3925674.326,2675650.946,2844261.942,838405.282,839832.468,59.72


In [17]:
#exports
def create_single_index_attrs_df(attributes):
    df_attrs = pd.DataFrame(attributes)
    attr_ids = list(df_attrs['id'].unique())
    
    if len(attr_ids) > 1:
        df_attrs = df_attrs.pivot('attribute', 'id', 'value')
    else:
        df_attrs = df_attrs.set_index('attribute').drop(columns='id')
        df_attrs = df_attrs.rename(columns={'value': 'Value'})
        
    return df_attrs, attr_ids

In [18]:
datapackage_url = 'https://raw.githubusercontent.com/OSUKED/Dictionary-Datasets/main/datasets/global-power-plant-database/datapackage.json'

alt_indexes = datapackage_url_to_alt_indexes[datapackage_url]
attributes = datapackage_url_to_attrs[datapackage_url]

df_attrs, attr_ids = create_single_index_attrs_df(attributes)

df_attrs

id,GBR0000174,GBR1000112
attribute,Unnamed: 1_level_1,Unnamed: 2_level_1
Estimated Annual Generation in 2017,3007.6,348.0
Geolocation Source,UK Renewable Energy Planning Database,CARMA
Installed Capacity (MW),1980.0,75.0
Latitude,53.7356,53.7364
Longitude,-0.9911,-0.9981
Owner,Drax Power,Drax Power Ltd
PLATTS-WEPP ID,1023594.0,
Primary Fuel Type,Coal,Gas
Secondary Fuel Type,Biomass,
Source,UK Renewable Energy Planning Database,Department for Business Energy & Industrial St...


In [19]:
#exports
from frictionless.types.array import type_to_class
from frictionless.field import Field

construct_attr_to_field_schema = lambda single_site_data: {attr['attribute']: attr['field_schema'] for attr in single_site_data['attributes']}

def idx_to_attr_name(idx):
    if isinstance(idx, tuple):
        attr = idx[0]
    else:
        attr = idx
        
    return attr

def get_field_class(attr, attr_to_field_schema):
    assert attr in attr_to_field_schema.keys(), f'`{attr}` was not one of the keys provided: {", ".join(attr_to_field_schema.keys())}'
    field_schema = Field(attr_to_field_schema[attr])
    field_type = field_schema['type']
    field_class = type_to_class[field_type](field_schema)
    
    return field_class

def format_attribute_value_types(df_attributes, attr_to_field_schema):
    for idx, row in df_attributes.iterrows():
        attr = idx_to_attr_name(idx)
        
        if attr in attr_to_field_schema.keys():
            field_class = get_field_class(attr, attr_to_field_schema)

            for id_, value in row.items():
                df_attributes.loc[idx, id_] = field_class.read_cell(value)

    return df_attributes

In [20]:
attr_to_field_schema = construct_attr_to_field_schema(single_site_data)

df_attrs = df_attrs.pipe(format_attribute_value_types, attr_to_field_schema)

df_attrs

id,GBR0000174,GBR1000112
attribute,Unnamed: 1_level_1,Unnamed: 2_level_1
Estimated Annual Generation in 2017,3007.6,348.0
Geolocation Source,UK Renewable Energy Planning Database,CARMA
Installed Capacity (MW),1980.0,75.0
Latitude,53.7356,53.7364
Longitude,-0.9911,-0.9981
Owner,Drax Power,Drax Power Ltd
PLATTS-WEPP ID,1023594.0,
Primary Fuel Type,Coal,Gas
Secondary Fuel Type,Biomass,
Source,UK Renewable Energy Planning Database,Department for Business Energy & Industrial St...


In [21]:
#exports
extract_datapackage_url_to_dict_id_type = lambda single_site_data: {k: v['related_resources'][0]['dictionary_pk_field'] for k, v in single_site_data['datasets'].items()}

def extract_datapackage_url_to_ids(single_site_data):
    assert 'attributes' in single_site_data.keys(), '`single_site_data` must contain an attributes key'
    datapackage_url_to_ids = {}

    for attr in single_site_data['attributes']:
        datapackage_url = attr['source']

        if datapackage_url not in datapackage_url_to_ids.keys():
            datapackage_url_to_ids[datapackage_url] = []

        datapackage_url_to_ids[datapackage_url] += [attr['id']]

    return datapackage_url_to_ids

def extract_combined_attrs_df(single_site_data, attr_to_field_schema):
    datapackage_url_to_alt_indexes = get_datapackage_url_to_alt_indexes(single_site_data)
    datapackage_url_to_dict_id_type = extract_datapackage_url_to_dict_id_type(single_site_data)
    
    df_combined_attrs = pd.DataFrame()

    for datapackage_url, attrs in get_datapackage_url_to_attributes(single_site_data).items():
        alt_indexes = datapackage_url_to_alt_indexes[datapackage_url]

        if len(alt_indexes) > 0:
            df_attrs, attr_ids = create_multi_index_attrs_df(attrs, alt_indexes)
        else:
            df_attrs, attr_ids = create_single_index_attrs_df(attrs)

        if (df_attrs.columns.size == 1) and (df_attrs.columns[0].lower()=='value'):
            df_attrs.columns = ['value']
            assert len(attr_ids) == 1, f'Expected to have only one ID, instead got: {", ".join(attr_ids)}'
            df_attrs.columns.name = 'id'
            df_attrs = df_attrs.rename(columns={'value': attr_ids[0]}) 

        df_attrs = format_attribute_value_types(df_attrs, attr_to_field_schema)

        df_stacked_attrs = df_attrs.stack().reset_index().rename(columns={0: 'value'})
        df_stacked_attrs.columns = df_stacked_attrs.columns.str.lower()
        df_stacked_attrs['datapackage'] = datapackage_url
        df_stacked_attrs['id_type'] = datapackage_url_to_dict_id_type[datapackage_url]

        df_combined_attrs = df_combined_attrs.append(df_stacked_attrs)

    df_combined_attrs = df_combined_attrs.reset_index(drop=True)

    return df_combined_attrs

In [22]:
df_combined_attrs = extract_combined_attrs_df(single_site_data, attr_to_field_schema)

df_combined_attrs.head()

Unnamed: 0,attribute,id,value,datapackage,id_type,year,financial_year
0,Fuel Type,DRAXX-1,BIOMASS,https://raw.githubusercontent.com/OSUKED/Dicti...,ngc_bmu_id,,
1,Fuel Type,DRAXX-10G,OCGT,https://raw.githubusercontent.com/OSUKED/Dicti...,ngc_bmu_id,,
2,Fuel Type,DRAXX-12G,OCGT,https://raw.githubusercontent.com/OSUKED/Dicti...,ngc_bmu_id,,
3,Fuel Type,DRAXX-2,BIOMASS,https://raw.githubusercontent.com/OSUKED/Dicti...,ngc_bmu_id,,
4,Fuel Type,DRAXX-3,BIOMASS,https://raw.githubusercontent.com/OSUKED/Dicti...,ngc_bmu_id,,


In [23]:
#exports
def get_datapackage_url_to_attrs_md_str(single_site_data):
    attr_to_field_schema = construct_attr_to_field_schema(single_site_data)
    datapackage_url_to_alt_indexes = get_datapackage_url_to_alt_indexes(single_site_data)
    datapackage_url_to_attrs = get_datapackage_url_to_attributes(single_site_data)
    
    datapackage_url_to_md_str = {}
    
    for datapackage_url, attributes in datapackage_url_to_attrs.items():
        alt_indexes = datapackage_url_to_alt_indexes[datapackage_url]
        
        if len(alt_indexes) > 0:
            df_attrs, attr_ids = create_multi_index_attrs_df(attributes, alt_indexes)
        else:
            df_attrs, attr_ids = create_single_index_attrs_df(attributes)
            
        df_attrs = format_attribute_value_types(df_attrs, attr_to_field_schema)
        
        datapackage_url_to_md_str[datapackage_url] = (
            df_attrs
            .reset_index()
            .astype(str)
            .to_markdown(index=False, floatfmt='.2f')
        )
        
    return datapackage_url_to_md_str

In [24]:
datapackage_url_to_md_str = get_datapackage_url_to_attrs_md_str(single_site_data)

JSON(datapackage_url_to_md_str)

<IPython.core.display.JSON object>

In [25]:
#exports
clean_dp_name = lambda dp_name: dp_name.replace('-', ' ').title()

def construct_dataset_md_str(dataset_metadata, dataset_attributes, dataset_page_url):
    title = clean_dp_name(dataset_metadata['datapackage_name'])
    dictionary_column_match = dataset_metadata['related_resources'][0]['dictionary_pk_field']
    dataset_column_match = dataset_metadata['related_resources'][0]['external_fk_field']
    
    
    if 'datapackage_description' in dataset_metadata.keys():
        description = dataset_metadata['datapackage_description']
    else:
        description = ''

    dataset_str = f"""##### <a href="{dataset_page_url}">{title}</a>

{description}

The \"{dictionary_column_match}\" dictionary field was matched to the \"{dataset_column_match}\" field in this dataset.

{dataset_attributes}\n"""
    
    return dataset_str

def single_site_data_to_datasets_md_str(single_site_data):
    datapackage_url_to_attrs_md_str = get_datapackage_url_to_attrs_md_str(single_site_data)
    dataset_url_to_md_str = {}

    for dataset_metadata in single_site_data['datasets'].values():
        dataset_url = dataset_metadata['datapackage_json_url']
        dataset_page_url = f'https://osuked.github.io/Power-Station-Dictionary/datasets/{dataset_metadata["datapackage_name"]}'
        dataset_attributes = datapackage_url_to_attrs_md_str[dataset_url]
        dataset_str = construct_dataset_md_str(dataset_metadata, dataset_attributes, dataset_page_url)

        dataset_url_to_md_str[dataset_url] = dataset_str
        
    datasets_md_str = '### Linked Datasets\n' + '\n<br><br>\n'.join(list(dataset_url_to_md_str.values()))
    
    return datasets_md_str

In [26]:
datasets_md_str = single_site_data_to_datasets_md_str(single_site_data)

Markdown(datasets_md_str)

### Linked Datasets
##### <a href="https://osuked.github.io/Power-Station-Dictionary/datasets/bmu-fuel-types">Bmu Fuel Types</a>



The "ngc_bmu_id" dictionary field was matched to the "NGC_BMU_ID" field in this dataset.

| attribute   | DRAXX-1   | DRAXX-10G   | DRAXX-12G   | DRAXX-2   | DRAXX-3   | DRAXX-4   | DRAXX-5   | DRAXX-6   | DRAXX-9G   |
|:------------|:----------|:------------|:------------|:----------|:----------|:----------|:----------|:----------|:-----------|
| Fuel Type   | BIOMASS   | OCGT        | OCGT        | BIOMASS   | BIOMASS   | BIOMASS   | COAL      | COAL      | OCGT       |

<br><br>
##### <a href="https://osuked.github.io/Power-Station-Dictionary/datasets/plant-locations">Plant Locations</a>



The "dictionary_id" dictionary field was matched to the "dictionary_id" field in this dataset.

| attribute   |   Value |
|:------------|--------:|
| Longitude   |   -0.63 |
| Latitude    |   53.75 |

<br><br>
##### <a href="https://osuked.github.io/Power-Station-Dictionary/datasets/global-power-plant-database">Global Power Plant Database</a>



The "gppd_idnr" dictionary field was matched to the "gppd_idnr" field in this dataset.

| attribute                           | GBR0000174                                                               | GBR1000112                                                                     |
|:------------------------------------|:-------------------------------------------------------------------------|:-------------------------------------------------------------------------------|
| Estimated Annual Generation in 2017 | 3007.6                                                                   | 348.0                                                                          |
| Geolocation Source                  | UK Renewable Energy Planning Database                                    | CARMA                                                                          |
| Installed Capacity (MW)             | 1980.0                                                                   | 75.0                                                                           |
| Latitude                            | 53.7356                                                                  | 53.7364                                                                        |
| Longitude                           | -0.9911                                                                  | -0.9981                                                                        |
| Owner                               | Drax Power                                                               | Drax Power Ltd                                                                 |
| PLATTS-WEPP ID                      | 1023594.0                                                                | NaN                                                                            |
| Primary Fuel Type                   | Coal                                                                     | Gas                                                                            |
| Secondary Fuel Type                 | Biomass                                                                  | None                                                                           |
| Source                              | UK Renewable Energy Planning Database                                    | Department for Business Energy & Industrial Strategy                           |
| URL                                 | https://www.gov.uk/government/collections/renewable-energy-planning-data | https://www.gov.uk/government/collections/digest-of-uk-energy-statistics-dukes |

<br><br>
##### <a href="https://osuked.github.io/Power-Station-Dictionary/datasets/verified-emissions">Verified Emissions</a>



The "eutl_id" dictionary field was matched to the "account_id" field in this dataset.

| Attribute              |   Year |       value |
|:-----------------------|-------:|------------:|
| CO2 Emissions (Tonnes) |   2005 | 20771624.00 |
| CO2 Emissions (Tonnes) |   2006 | 22764847.00 |
| CO2 Emissions (Tonnes) |   2007 | 22160413.00 |
| CO2 Emissions (Tonnes) |   2008 | 22299778.00 |
| CO2 Emissions (Tonnes) |   2009 | 19851702.00 |
| CO2 Emissions (Tonnes) |   2010 | 22392487.00 |
| CO2 Emissions (Tonnes) |   2011 | 21465607.00 |
| CO2 Emissions (Tonnes) |   2012 | 22694684.00 |
| CO2 Emissions (Tonnes) |   2013 | 20317580.00 |
| CO2 Emissions (Tonnes) |   2014 | 16581565.00 |
| CO2 Emissions (Tonnes) |   2015 | 13173987.00 |
| CO2 Emissions (Tonnes) |   2016 |  6171178.00 |
| CO2 Emissions (Tonnes) |   2017 |  6215220.00 |
| CO2 Emissions (Tonnes) |   2018 |  4138782.00 |
| CO2 Emissions (Tonnes) |   2019 |   725751.00 |
| CO2 Emissions (Tonnes) |   2020 |  1527003.00 |

<br><br>
##### <a href="https://osuked.github.io/Power-Station-Dictionary/datasets/annual-output">Annual Output</a>



The "ngc_bmu_id" dictionary field was matched to the "ngc_bmu_id" field in this dataset.

| Attribute           |   Year |    DRAXX-1 |   DRAXX-10G |   DRAXX-12G |    DRAXX-2 |    DRAXX-3 |    DRAXX-4 |    DRAXX-5 |    DRAXX-6 |   DRAXX-9G |
|:--------------------|-------:|-----------:|------------:|------------:|-----------:|-----------:|-----------:|-----------:|-----------:|-----------:|
| Annual Output (MWh) |   2016 | 5014662.68 |       42.71 |       23.04 | 4591242.68 | 3270868.97 | 2832039.08 | 2138424.28 | 2088242.12 |      27.69 |
| Annual Output (MWh) |   2017 | 4036655.43 |      135.21 |       81.50 | 4165307.98 | 4754720.16 | 2515965.61 | 3020489.48 | 1814090.70 |     134.02 |
| Annual Output (MWh) |   2018 | 5272782.11 |      194.75 |      200.66 | 3775143.04 | 3765275.16 | 1827026.43 | 1671123.54 | 1979873.84 |     123.50 |
| Annual Output (MWh) |   2019 | 4759282.28 |      178.77 |      139.35 | 2833499.66 | 3585051.60 | 2607930.56 |  416402.33 |  297244.18 |      44.04 |
| Annual Output (MWh) |   2020 | 5016991.29 |      218.85 |      242.96 | 3925674.33 | 2675650.95 | 2844261.94 |  838405.28 |  839832.47 |      59.72 |

<br><br>
##### <a href="https://osuked.github.io/Power-Station-Dictionary/datasets/capture-prices">Capture Prices</a>



The "ngc_bmu_id" dictionary field was matched to the "ngc_bmu_id" field in this dataset.

| Attribute             |   Year |   DRAXX-1 |   DRAXX-10G |   DRAXX-12G |   DRAXX-2 |   DRAXX-3 |   DRAXX-4 |   DRAXX-5 |   DRAXX-6 |   DRAXX-9G |
|:----------------------|-------:|----------:|------------:|------------:|----------:|----------:|----------:|----------:|----------:|-----------:|
| Capture Price (£/MWh) |   2016 |     38.24 |       30.70 |       37.35 |     38.40 |     38.08 |     42.34 |     46.29 |     44.78 |      42.06 |
| Capture Price (£/MWh) |   2017 |     44.73 |       41.33 |       38.36 |     45.73 |     45.25 |     49.83 |     48.69 |     51.00 |      40.24 |
| Capture Price (£/MWh) |   2018 |     57.12 |      151.90 |      147.49 |     58.01 |     57.63 |     60.85 |     63.24 |     61.93 |     152.00 |
| Capture Price (£/MWh) |   2019 |     41.90 |       70.81 |       62.47 |     43.84 |     42.11 |     42.06 |     55.16 |     58.25 |      65.30 |
| Capture Price (£/MWh) |   2020 |     34.40 |      152.91 |      112.53 |     37.10 |     34.47 |     36.38 |     37.70 |     46.35 |      27.31 |

<br><br>
##### <a href="https://osuked.github.io/Power-Station-Dictionary/datasets/cfd-contract-portfolio-status">Cfd Contract Portfolio Status</a>



The "cfd_id" dictionary field was matched to the "CFD_ID" field in this dataset.

| attribute                               | Value                             |
|:----------------------------------------|:----------------------------------|
| Name of CfD Unit                        | Drax 3rd conversion unit (unit 1) |
| Allocation Round                        | Investment Contract               |
| Technology Type                         | Biomass Conversion                |
| Transmission or Distribution Connection | Transmission                      |
| Status                                  | Post-Start Date                   |
| Estimated Start Date                    | None                              |
| Maximum Contract Capacity (MW)          | 645.0                             |

<br><br>
##### <a href="https://osuked.github.io/Power-Station-Dictionary/datasets/cfd-strike-prices">Cfd Strike Prices</a>



The "cfd_id" dictionary field was matched to the "cfd_id" field in this dataset.

| Attribute              |   Financial_year |   value |
|:-----------------------|-----------------:|--------:|
| Strike Price (GBP/MWh) |             2016 |  105.88 |
| Strike Price (GBP/MWh) |             2017 |  108.02 |
| Strike Price (GBP/MWh) |             2018 |  111.29 |
| Strike Price (GBP/MWh) |             2019 |  113.65 |
| Strike Price (GBP/MWh) |             2020 |  116.49 |
| Strike Price (GBP/MWh) |             2021 |  118.54 |


<br>

### Downloads Section

In [27]:
#exports
def construct_downloads_md_str(object_id):
    file_md_table = pd.DataFrame([
        {'File': 'Attributes', 'Filepath': f'[{object_id}.csv](https://osuked.github.io/Power-Station-Dictionary/object_attrs/{object_id}.csv)'}
    ]).to_markdown(index=False)

    downloads_str = f"""### Downloads\n

{file_md_table}\n"""
    
    return downloads_str

In [28]:
object_id = 10001

downloads_str = construct_downloads_md_str(object_id)

Markdown(downloads_str)

### Downloads


| File       | Filepath                                                                              |
|:-----------|:--------------------------------------------------------------------------------------|
| Attributes | [10001.csv](https://osuked.github.io/Power-Station-Dictionary/object_attrs/10001.csv) |


In [35]:
#exports
construct_contributors_str = (lambda object_id: f"""### Contribute

We need your help! If you know of any data associated with this power plant which is currently missing then please add it using the relevant Google form which can be accessed with the buttons below.  If you are adding an ID from a linkage which is already known you need to only complete the *Add New Link* form, if the link type is not currently in the dictionary you will need to use the *Add New Link Type* form.\n\nThank You!

[Add New Link](https://docs.google.com/forms/d/e/1FAIpQLSc5jRsQ7NgiLLXbwo9PUdwTQyuqbRwThltG56-o6NVSe7E_nw/viewform?usp=pp_url&entry.251912331={object_id}){{ .md-button }}\n
[Add New Link Type](https://docs.google.com/forms/d/e/1FAIpQLSdQfLmfOR0Vw4Z7gDQAIhBbqIifd1RuSFPKmDQpROhOqjo7ew/viewform?usp=pp_url&entry.2141539628={object_id}){{ .md-button }}""")

In [36]:
contributors_str = construct_contributors_str(object_id)

Markdown(contributors_str)

### Contribute

We need your help! If you know of any data associated with this power plant which is currently missing then please add it using the relevant Google form which can be accessed with the buttons below.  If you are adding an ID from a linkage which is already known you need to only complete the *Add New Link* form, if the link type is not currently in the dictionary you will need to use the *Add New Link Type* form.

Thank You!

[Add New Link](https://docs.google.com/forms/d/e/1FAIpQLSc5jRsQ7NgiLLXbwo9PUdwTQyuqbRwThltG56-o6NVSe7E_nw/viewform?usp=pp_url&entry.251912331=10001){ .md-button }

[Add New Link Type](https://docs.google.com/forms/d/e/1FAIpQLSdQfLmfOR0Vw4Z7gDQAIhBbqIifd1RuSFPKmDQpROhOqjo7ew/viewform?usp=pp_url&entry.2141539628=10001){ .md-button }

<br>

### Populating the Templates

In [39]:
#exports
def extract_name_from_single_site_data(single_site_data):
    potential_names = [v['name'] for k, v in single_site_data['id_hierarchies'].items() if'name' in v.keys()]

    if len(potential_names) > 0:
        name = potential_names[0]
        return name
    else:
        return None

def single_site_data_to_md_str(single_site_data, root_id, datapackage_json_fp):
    site_ids_md_str = single_site_data_to_ids_md_str(single_site_data, root_id, datapackage_json_fp)
    datasets_md_str = single_site_data_to_datasets_md_str(single_site_data)
    downloads_md_str = construct_downloads_md_str(root_id)
    contributors_str = construct_contributors_str(root_id)
    
    site_md_str = site_ids_md_str + '\n\n<br>\n' + datasets_md_str + '\n\n<br>\n' + downloads_md_str + '\n\n<br>\n' + contributors_str
    
    return site_md_str

def populate_and_save_template(template_fp, save_fp, render_kwargs):
    rendered_str = Template(open(template_fp).read()).render(**render_kwargs)

    with open(save_fp, 'w', encoding='utf-8') as f:
        try:
            f.write(rendered_str)
        except e as exc:
            raise exc
            
    return None

def clean_object_ids_to_names(object_ids_to_names):
    ## need to add a check that they're all unique    
    object_names = sorted(object_ids_to_names.values())

    alpha_names = [name for name in object_names if name[0].isalpha()]
    numeric_names = [name for name in object_names if not name[0].isalpha()]

    object_names = alpha_names + numeric_names
    
    object_names_to_ids = {v: k for k, v in object_ids_to_names.items()}
    object_ids_to_names = {object_names_to_ids[v]: v for v in object_names}

    return object_ids_to_names

def get_object_ids_to_names(
    site_data: dict,
    datapackage_json_fp,
    use_name_as_suffix: bool=False,
    object_template_fp: str='templates/objects_page.md',
    docs_fp: str='docs'
):
    object_ids_to_names = {}
    df_all_sites_combined_attrs = pd.DataFrame()

    for dictionary_id, single_site_data in tqdm(site_data.items()):
        if 'attributes' in single_site_data.keys():
            attr_to_field_schema = construct_attr_to_field_schema(single_site_data)
            df_combined_attrs = extract_combined_attrs_df(single_site_data, attr_to_field_schema)
            df_combined_attrs.to_csv(f'{docs_fp}/object_attrs/{dictionary_id}.csv', index=False)
            
            df_combined_attrs = df_combined_attrs.assign(dictionary_id=dictionary_id)
            df_all_sites_combined_attrs = df_all_sites_combined_attrs.append(df_combined_attrs)
            
            name = extract_name_from_single_site_data(single_site_data)

            if name is not None:
                name = name.replace('/', '-').strip()
            else:
                name = dictionary_id

            object_ids_to_names[dictionary_id] = name

            if use_name_as_suffix == True: 
                save_fp = f'{docs_fp}/objects/{name}.md'
            else:
                save_fp = f'{docs_fp}/objects/{dictionary_id}.md'

            render_kwargs = {'site_ids_md_string': single_site_data_to_md_str(single_site_data, dictionary_id, datapackage_json_fp)}
            populate_and_save_template(object_template_fp, save_fp, render_kwargs)

    object_ids_to_names = clean_object_ids_to_names(object_ids_to_names)
    df_all_sites_combined_attrs.to_csv(f'{docs_fp}/object_attrs/dictionary_attributes.csv', index=False)

    return object_ids_to_names

In [36]:
object_ids_to_names = get_object_ids_to_names(
    site_data, 
    datapackage_json_fp,
    object_template_fp='../templates/objects_page.md',
    docs_fp='../docs'
)

100%|████████████████████████████████████████████████████████████████████████████████| 277/277 [01:05<00:00,  4.26it/s]


In [44]:
#exports
def construct_object_docs(
    datapackage_fp,
    site_data: str='data/intermediate/site_data.json',
    mkdocs_template_fp: str='templates/mkdocs.yml',
    object_template_fp: str='templates/objects_page.md',
    save_fp: str='mkdocs.yml',
    docs_fp: str='docs'
):
    if isinstance(site_data, str):
        with open(site_data, 'r') as f:
            site_data = json.load(f)

    object_ids_to_names = get_object_ids_to_names(site_data, datapackage_fp, object_template_fp=object_template_fp, docs_fp=docs_fp)

    render_kwargs = {'object_ids_to_names': object_ids_to_names}
    populate_and_save_template(mkdocs_template_fp, save_fp, render_kwargs)

In [45]:
construct_object_docs(
    datapackage_json_fp,
    site_data='../data/intermediate/site_data.json',
    mkdocs_template_fp='../templates/mkdocs.yml',
    object_template_fp='../templates/objects_page.md',
    save_fp='../mkdocs.yml',
    docs_fp='../docs'
)

100%|████████████████████████████████████████████████████████████████████████████████| 277/277 [00:52<00:00,  5.31it/s]


In [37]:
#hide
from nbdev.export import *
notebook2script()

Converted 00-documentation.ipynb.
Converted 01-dictionary-page.ipynb.
Converted 02-attribute extraction.ipynb.
Converted 03-page-population.ipynb.
Converted 04-cli.ipynb.
Converted 05-carbon-intensity.ipynb.
Converted 06-cfd-capture-price-comparison.ipynb.
Converted 07-dataset-pages.ipynb.
Converted 08-papers.ipynb.
Converted 09-id-submission.ipynb.
