In [1]:
import pandas as pd
import requests
from tqdm import tqdm
from typing import Optional, Any

from powerdict import db

In [2]:
db_client = db.get_db_client(database_name='../tests/data/dictionary.db')

db_client

<powerdict.db.DbClient at 0x105e96790>

In [3]:
import logging

class ApiClient:
    def __init__(
            self, 
            username: Optional[str] = None, 
            password: Optional[str] = None,
            base_url: str = 'http://127.0.0.1:8000'
        ):
        self.base_url = base_url

        if username is not None:
            self.login(username, password)

    def login(self, username: str, password: str):
        url = f'{self.base_url}/authentication/login?username={username}&password={password}'
        r = requests.post(url)
        r.raise_for_status()
        r_json = r.json()
        self.auth_header = f"{r_json['token_type'].title()} {r_json['access_token']}"
    
    def get_db_table_schema(self, table_name: str):
        r = requests.get(f'{self.base_url}/frictionless/db-schema/{table_name}', headers={'Authorization': self.auth_header})
        r.raise_for_status()
        return r.json()
    
    def post_data_package_fp(self, data_package_fp: str):
        r = requests.post(
            f'{self.base_url}/frictionless/data-packages', 
            params={'data_package_fp': data_package_fp},
            headers={'Authorization': self.auth_header}
        )
        r.raise_for_status()
        return r.json()
    
    def post_asset_link_ids(
            self, 
            osuked_id: int,
            linked_ids: dict[str, list[Any]]
        ):
        import json
        r = requests.post(
            f'{self.base_url}/dictionary/asset/link-ids/{osuked_id}', 
            headers={'Authorization': self.auth_header, 'ContentType': 'application/json'}, 
            json=linked_ids
        )
        
        try:
            r.raise_for_status()
        except requests.HTTPError as e:
            logging.warning(r.json())
            raise e

        return r.json()

In [4]:
api_client = ApiClient('tester', 'password')

api_client

<__main__.ApiClient at 0x148225890>

<br>

### Uploading Data Packages

We'll start by uploading the data packages

In [5]:
fps = [
    '../data/linked-datapackages/plant-locations',
    '../data/linked-datapackages/wind-farms',
    '../data/linked-datapackages/global-power-plant-database',
    '../data/linked-datapackages/nuclear-power-plants',
    '../data/linked-datapackages/cfd-contract-portfolio-status',
    '../data/linked-datapackages/renewable-energy-planning-database',
    '../data/linked-datapackages/bmu-fuel-types',
    '../data/linked-datapackages/jrc-hydro-power-plants-database',

    # the following require handling of time-series indexes
    # '../data/linked-datapackages/cfd-strike-prices',
    # '../data/linked-datapackages/annual-output',
    # '../data/linked-datapackages/load-factors',
    # '../data/linked-datapackages/carbon-intensity',
    # '../data/linked-datapackages/verified-emissions',
    # '../data/linked-datapackages/capture-prices'
]

for fp in tqdm(fps):
    api_client.post_data_package_fp(fp.lstrip('../') + '/datapackage.json')

100%|██████████| 8/8 [00:13<00:00,  1.69s/it]


<br>

We can then retrieve their DB table schemas using the API client

In [6]:
api_client.get_db_table_schema('dict__source_detailed_bmu_fuel_types')

{'title': 'DetailedBmuFuelTypesTable',
 'type': 'object',
 'properties': {'ngc_bmu_id': {'title': 'NGC BMU ID',
   'description': 'The Balancing Mechanism Unit identifier used by the National Grid Company',
   'type': 'string'},
  'fuel_type': {'title': 'Fuel Type',
   'description': 'The fuel type consumed by the specified BMU',
   'type': 'string'},
  'comments': {'title': 'Comments', 'type': 'string'}}}

In [7]:
# migrate the DB to 🐘
# then get the API working with docker
# then deploy to ECR/ECS!

In [8]:
df_ids = pd.read_csv('../data/dictionary/ids.csv').set_index('dictionary_id')

df_ids.head(3)

Unnamed: 0_level_0,gppd_idnr,esail_id,name,sett_bmu_id,ngc_bmu_id,4c_offshore_id,windpowernet_id,wikidata_id,wikipedia_id,power_technology_id,eutl_id,eic_id,cfd_id,jrc_id,iaea_id,old_repd_id,new_repd_id,crown_estate_id
dictionary_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
10000,,MARK,Rothes Bio-Plant CHP,"E_MARK-1, E_MARK-2","MARK-1, MARK-2",,,,,,,48W000000MARK-1D,,,,,,
10001,"GBR1000377, GBR1000369",DIDC,Didcot,"T_DIDC1, T_DIDC2, T_DIDC4, T_DIDC3, T_DIDC1G, ...","DIDC1, DIDC2, DIDC4, DIDC3, DIDC1G, DIDC2G, DI...",,,Q3298465,Didcot_power_stations,,97165.0,"48W00000DIDC01G1, 48W00000DIDC02GZ, 48W00000DI...",,,,,,
10002,"GBR1000374, GBR1000375",ABTH,Aberthaw B,"T_ABTH7, T_ABTH8, T_ABTH9, T_ABTH7G, T_ABTH8G,...","ABTH7, ABTH8, ABTH9, ABTH7G, ABTH8G, ABTH9G",,,Q4667192,Aberthaw_power_stations,,97175.0,"48W0000000ABTH7Y, 48W0000000ABTH8W, 48W0000000...",,,,,,


In [10]:
df_names = pd.read_csv('../data/linked-datapackages/common-names/common-names.csv').set_index('dictionary_id')

df_names.head(3)

Unnamed: 0_level_0,common_name
dictionary_id,Unnamed: 1_level_1
10000,Rothes Bio-Plant CHP
10001,Didcot
10002,Aberthaw B


In [11]:
id_col_to_link_type = {
    'gppd_idnr': 'gppd',
    'sett_bmu_id': 'bmu',
    'ngc_bmu_id': 'ngc_bmu',
    'eutl_id': 'eutl',
    'cfd_id': 'cfd',
    'jrc_id': 'jrc',
    'iaea_id': 'iaea',
    'old_repd_id': 'old_repd',
    'new_repd_id': 'repd',

    # IDs without linked tables (for now)
    # '4c_offshore_id': '',
    # 'windpowernet_id': '',
    # 'wikidata_id': '',
    # 'wikipedia_id': '',
    # 'power_technology_id': '',
    # 'eic_id': '',
    # 'crown_estate_id': ''
}

for osuked_id, common_name in tqdm(df_ids['name'].items(), total=df_ids.shape[0]):
    linked_ids = {}

    for id_col, linked_ids_str in df_ids.loc[osuked_id, id_col_to_link_type.keys()].dropna().items():
        linked_ids = {}
        linked_ids_list = linked_ids_str.split(', ')
        link_type = id_col_to_link_type[id_col]
        
        linked_ids[link_type] = linked_ids_list
    
    db_client.create_record({'osuked_id': osuked_id, 'common_name': common_name}, 'dict__register')
    api_client.post_asset_link_ids(osuked_id, linked_ids)

    linked_ids

100%|██████████| 277/277 [00:03<00:00, 70.08it/s]
