# Testing Each Data Stream

<br>

<b>Data Streams:</b>

* Generation by fuel type 
* Forecast demand (*Need to add)
* Forecast wind and solar
* Realised wind and solar 
* Imbalance price and volume
* Predicted demand and imbalance volumes
* Imbalance market bids and offers 
* Accepted bids and offers 


<br>

### Imports

In [1]:
import pandas as pd
import numpy as np
from datetime import timedelta, datetime, date
import time

import seaborn as sns
import matplotlib.pyplot as plt
    
import BMRS_caller as BMRS
import warnings

from os import listdir
from os.path import isfile, join

<br>

### Report Grabber Orchestrator

In [2]:
## Could also add in default kwargs
## Could combine with stream_url_dict


reports_2_grab = {
    'FUELHH' : {
        'name' : 'Fuel Half-Hour',
        'final_cols' : ['ccgt', 'oil', 'coal', 'nuclear', 'wind', 'ps', 'npshyd', 'ocgt', 'other', 'intfr', 'intirl', 'intned', 'intew', 'biomass', 'intnem'],
    },
      
    'B1440' : {
        'name' : 'Day-Ahead Generation forecasts for Wind and Solar',
        'pre_filter_cols' : {
            'processType' : ['Day Ahead', 'Day ahead'],         
        },
        'long_2_wide' : {
            'index' : 'local_datetime',
            'column' : 'powerSystemResourceType',
            'values' : 'quantity'
        },
    },
    
            
    'B1630' : {
        'name' : 'Actual Or Estimated Wind and Solar Power Generation',
        'long_2_wide' : {
            'index' : 'local_datetime',
            'column' : 'powerSystemResourceType',
            'values' : 'quantity'
        },
    },
    
    'B1770' : {
        'name' : 'Imbalance Prices',
        'long_2_wide' : {
            'index' : 'local_datetime',
            'column' : 'priceCategory',
            'values' : 'imbalancePriceAmountGBP'
        },
    },
    
    'B1780' : {
        'name' : 'Aggregated Imbalance Volumes',
        'final_cols' : ['imbalanceQuantityMAW'],
        
    },
    
    'MELIMBALNGC' : {
        'name' : 'Forecast Day and Day Ahead Margin and Imbalance',
        'final_cols' : ['nationalBoundaryIdentifier', 'margin', 'imbalanceValue'],
        'extra_kwargs' : {
            'zone' : '*',
        },
    },
    
    
}

In [9]:
def clean_df(df, report_dict):
    ## Pre-filter
    if 'pre_filter_cols' in report_dict.keys():
        for col, vals_keep in report_dict['pre_filter_cols'].items():
            df = df[df[col].isin(vals_keep)].reset_index(drop=True)
    
    ## Reformatting Dataframe
    if 'long_2_wide' in report_dict.keys():
        long_2_wide_dict = report_dict['long_2_wide']
        
        ## Removing duplicates
        pivot_cols = list(set(report_dict['long_2_wide'].values()) - set([long_2_wide_dict['values']]))
        s_bool_duplicate = df[pivot_cols].duplicated()
        
        if s_bool_duplicate.sum() > 0:
            duplicate_dates = df[s_bool_duplicate]['local_datetime'].unique()
            df = df[~s_bool_duplicate]
            warnings.warn(f'Duplicates were found for {duplicate_dates}')
        
        df = df.pivot(long_2_wide_dict['index'], long_2_wide_dict['column'], long_2_wide_dict['values'])
    else:
        df = df.set_index('local_datetime')

    ## Filtering columns
    if 'final_cols' in report_dict.keys(): 
        df = df[report_dict['final_cols']]

    return df

In [5]:
def orchestrator(reports_2_grab, API_key, start_date, end_date):
    collected_files = listdir('data')
    #report_dfs = dict()
    
    for report in reports_2_grab.keys():
        if f'{report}_{start_date.year}.csv' not in collected_files:
            start = time.time()
            report_dict = reports_2_grab[report]

            ## Initialise caller for the data stream
            Caller = BMRS.Caller(API_key, report)

            ## Grab the report and parse into a dataframe
            if 'extra_kwargs' in report_dict.keys():
                df_raw = Caller.call(start_date=start_date, end_date=end_date, **report_dict['extra_kwargs'])
            else:
                df_raw = Caller.call(start_date=start_date, end_date=end_date)

            ## Clean the dataframe
            df_clean = clean_df(df_raw, reports_2_grab[report])
            df_clean.to_csv(f'data/{report}_{start_date.year}.csv')

            ## Calculating elapsed time
            end = time.time()
            elapsed_sec = end - start

            mins = int(elapsed_sec/60)
            secs = int(elapsed_sec - 60*mins)

            ## Summary print
            print(f'{report} complete for {start_date.year}, time: {mins} minutes and {secs} seconds')
        else: 
            print(f'{report} already complete for {start_date.year}')
        
    return #report_dfs

<br>

### Test Call

In [6]:
API_key = 'api_key'
report = 'B1630'
start_date = date(2018, 11, 1)
end_date = datetime(2018, 12, 31, 23, 30)

Caller = BMRS.Caller(API_key, report)
df_raw = Caller.call(start_date=start_date, end_date=end_date)#, **reports_2_grab[report]['extra_kwargs'])
df_clean = clean_df(df_raw, reports_2_grab[report])

df_clean.head()



powerSystemResourceType,"""Solar""","""Wind Offshore""","""Wind Onshore"""
local_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-11-01 00:00:00+00:00,0,2333.279,1452.511
2018-11-01 00:30:00+00:00,0,1844.153,1449.257
2018-11-01 01:00:00+00:00,0,1577.876,1351.243
2018-11-01 01:30:00+00:00,0,1535.398,1362.091
2018-11-01 02:00:00+00:00,0,1381.419,1219.353


In [7]:
df_clean.tail()

powerSystemResourceType,"""Solar""","""Wind Offshore""","""Wind Onshore"""
local_datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-12-31 21:30:00+00:00,0,2952.542,6360.033
2018-12-31 22:00:00+00:00,0,3032.464,6466.579
2018-12-31 22:30:00+00:00,0,3008.399,6214.767
2018-12-31 23:00:00+00:00,0,3085.224,6268.369
2018-12-31 23:30:00+00:00,0,3177.29,6196.479


<br>

### Orchestration Call Test

In [12]:
API_key = 'api_key'

for year in range(2018, 2019):
    start_date = date(year, 1, 1)
    end_date = datetime(year, 12, 31, 23, 30)

    orchestrator(reports_2_grab, API_key, start_date, end_date)

FUELHH already complete for 2018
B1440 already complete for 2018
B1630 already complete for 2018
B1770 complete for 2018, time: 56 minutes and 7 seconds


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  sort=sort)


B1780 complete for 2018, time: 61 minutes and 51 seconds
MELIMBALNGC complete for 2018, time: 5 minutes and 17 seconds


In [None]:
## Should test auto reindexing and having NaNs