In [7]:
import pytz
import datetime
from tqdm import tqdm
import pandas as pd
from functools import lru_cache
from pydantic import parse_obj_as

from powerdict import schemas, db
from powerdict.crawler import bmrs

In [8]:
db_client = db.get_db_client(database_name='../data/dictionary.db')
db_client.create_tables()

db_client

<powerdict.db.DbClient at 0x2931683d0>

In [9]:
bmrs_crawler = bmrs.BmrsCrawler()

bmrs_crawler

<powerdict.crawler.bmrs.BmrsCrawler at 0x292f8ad10>

In [10]:
start_date = datetime.datetime(2022, 7, 10, 1)
end_date = datetime.datetime(2022, 7, 10, 1) # N.b. both the start and end dates are inclusive

physical_ts_batch = bmrs_crawler.get_physical_ts_batch('PN', start_date, end_date)

pd.DataFrame(physical_ts_batch).head()

In [None]:
from typing import Optional
from sqlmodel import SQLModel

def get_settlement_start_date_period(
    db_client: db.DbClient,
    table_schema: SQLModel,
    default_settlement_date: str = '2022-07-10',
    default_settlement_period: int = 2
) -> tuple[str, int]:
    query_result = db_client.run_query(
        f'SELECT settlementDate, settlementPeriod FROM {table_schema.__tablename__} ORDER BY settlementDate DESC, settlementPeriod DESC LIMIT 1',
        results_func = lambda results: results.first()
    )

    if query_result is not None:
        return query_result
    else:
        return default_settlement_date, default_settlement_period
    
def download_physical_data(
    db_client: db.DbClient,
    bmrs_crawler: bmrs.BmrsCrawler,
    start_date: Optional[datetime.datetime] = None,
    end_date: Optional[datetime.datetime] = None,
    run_async: bool = True
):
    if start_date is None:
        latest_settlement_date, latest_settlement_period = get_settlement_start_date_period(db_client, db.BmrsPhysicalDataTable)
        start_date = bmrs_crawler.df_settlement_calendar.query('datetime_local.dt.strftime("%Y-%m-%d") == @latest_settlement_date and sp == @latest_settlement_period').index[0] + pd.Timedelta(minutes=30)

    if end_date is None:
        end_date = pd.Timestamp.now(tz=pytz.utc)

    start_dates = pd.date_range(start_date, end_date, freq='1D')
    end_dates = (start_dates[1:] - pd.Timedelta(minutes=30)).append(pd.DatetimeIndex([end_date]))

    start_end_date_combos = list(zip(start_dates, end_dates))

    for batch_start_date, batch_end_date in tqdm(start_end_date_combos):
        bmrs_physical_data = bmrs_crawler.get_physical_ts_batch('PN', batch_start_date, batch_end_date, run_async=run_async)
        db_records = parse_obj_as(list[db.BmrsPhysicalDataTable], bmrs_physical_data)
        db_client.create_records(db_records, db.BmrsPhysicalDataTable.__tablename__)

    return 

In [None]:
# while True:
#     try:
download_physical_data(db_client, bmrs_crawler, run_async=False)
    # except:
    #     print('Error occurred, retrying ...')

  0%|          | 0/74 [00:00<?, ?it/s]Exception in thread Thread-5:
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.11/3.11.3/Frameworks/Python.framework/Versions/3.11/lib/python3.11/threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "/Users/ayrton/Documents/GitHub/Power-Station-Dictionary/powerdict/crawler/bmrs.py", line 122, in run
    self.result = asyncio.run(self.func(*self.args, **self.kwargs))
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.11/3.11.3/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/runners.py", line 190, in run
    return runner.run(main)
           ^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.11/3.11.3/Frameworks/Python.framework/Versions/3.11/lib/python3.11/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/Cellar/python@3.11/3.11.3/Fr

TypeError: 'NoneType' object is not iterable