In [1]:
import json
import numpy as np
import pandas as pd
from urllib.error import URLError, HTTPError
from urllib.request import urlopen

from datetime import datetime
from dateutil.parser import parse
from pandas.tseries.offsets import *

In [2]:
def pickDataset(tok, startdate, enddate, offsetdays, serieslist):
    '''
    Assemble dataset of demand data per balancing authority for desired date range.
    
    Parameters:
    - tok : token obtained by registering with EIA
    - start : start date
    - end : end data
    - serieslist : list of demand series names provided by EIA
    - offsetdays : number of business days for data to stabilize
    
    Returns:
    - Dataframe indexed with hourly UTC time and BA series name for column names
    
    '''
    df={}
    
    for x in [[i] for i in serieslist]:
        BA = x[0]
        print(BA)
        d = EIAgov(tok, x)
        df[BA] = d.GetData()
        df[BA].index = pd.to_datetime(df[BA]['Date'])
        df[BA].drop(columns =['Date'], inplace=True)
        df[BA] = df[BA].resample('H').asfreq()   #fills in missing hours

    timespan = pd.date_range(startdate, enddate - DateOffset(days=offsetdays), freq='H')
    
    df_all = pd.DataFrame(index = timespan)
    for x in demand_list:
        df_all = pd.concat([df_all,df[x]], axis=1)

    return df_all
    

'''
Class EIAgov copied from https://quantcorner.wordpress.com/2014/11/18/downloading-eias-data-with-python/ on 8/13/2018
'''
class EIAgov(object):
    def __init__(self, token, series):
        '''
        Purpose:
        Initialise the EIAgov class by requesting:
        - EIA token
        - id code(s) of the series to be downloaded

        Parameters:
        - token: string
        - series: string or list of strings
        '''
        self.token = token
        self.series = series

    def Raw(self, ser):
            # Construct url
            url = 'http://api.eia.gov/series/?api_key=' + self.token + '&series_id=' + ser.upper()
            #url = 'http://api.eia.gov/category/?api_key=' + self.token + '&category_id=' + ser.id

            try:
                # URL request, URL opener, read content
                response = urlopen(url);
                raw_byte = response.read()
                raw_string = str(raw_byte, 'utf-8-sig')
                jso = json.loads(raw_string)
                return jso

            except HTTPError as e:
                print('HTTP error type.')
                print('Error code: ', e.code)

            except URLError as e:
                print('URL type error.')
                print('Reason: ', e.reason)

    def GetData(self):
            # Deal with the date series                       
            date_ = self.Raw(self.series[0])        
            date_series = date_['series'][0]['data']
            endi = len(date_series) # or len(date_['series'][0]['data'])
            date = []
            for i in range (endi):
                date.append(date_series[i][0])

            # Create dataframe
            df = pd.DataFrame(data=date)
            df.columns = ['Date']

            # Deal with data
            lenj = len(self.series)
            for j in range (lenj):
                data_ = self.Raw(self.series[j])
                data_series = data_['series'][0]['data']
                data = []
                endk = len(date_series)         
                for k in range (endk):
                    data.append(data_series[k][1])
                df[self.series[j]] = data

            return df



In [3]:
token = '6d666bb7097e41102ef69a35aa1edb2b'
offset = 3    
start = '2015-07-01 07:00:00'
end = datetime.today()

demand_list = ['EBA.AVA-ALL.D.H',
               'EBA.AZPS-ALL.D.H',
               'EBA.BANC-ALL.D.H',
               'EBA.BPAT-ALL.D.H',
               'EBA.CHPD-ALL.D.H',
               'EBA.CISO-ALL.D.H',
               'EBA.DOPD-ALL.D.H',
               'EBA.EPE-ALL.D.H',
               'EBA.GCPD-ALL.D.H',
               'EBA.IID-ALL.D.H',
               'EBA.IPCO-ALL.D.H',
               'EBA.LDWP-ALL.D.H',
               'EBA.NEVP-ALL.D.H',
               'EBA.NWMT-ALL.D.H',  
               'EBA.PACE-ALL.D.H',
               'EBA.PACW-ALL.D.H',
               'EBA.PGE-ALL.D.H',
               'EBA.PSCO-ALL.D.H',
               'EBA.PSEI-ALL.D.H',
               'EBA.SRP-ALL.D.H',
               'EBA.SCL-ALL.D.H',
               'EBA.TEPC-ALL.D.H',
               'EBA.TIDC-ALL.D.H',
               'EBA.TPWR-ALL.D.H',
               'EBA.WALC-ALL.D.H',
               'EBA.WACM-ALL.D.H',
               'EBA.WAUW-ALL.D.H']


this = pickDataset(token, start, end, offset, demand_list)

this.head()

EBA.AVA-ALL.D.H
EBA.AZPS-ALL.D.H
EBA.BANC-ALL.D.H
EBA.BPAT-ALL.D.H
EBA.CHPD-ALL.D.H
EBA.CISO-ALL.D.H
EBA.DOPD-ALL.D.H
EBA.EPE-ALL.D.H
EBA.GCPD-ALL.D.H
EBA.IID-ALL.D.H
EBA.IPCO-ALL.D.H
EBA.LDWP-ALL.D.H
EBA.NEVP-ALL.D.H
EBA.NWMT-ALL.D.H
EBA.PACE-ALL.D.H
EBA.PACW-ALL.D.H
EBA.PGE-ALL.D.H
EBA.PSCO-ALL.D.H
EBA.PSEI-ALL.D.H
EBA.SRP-ALL.D.H
EBA.SCL-ALL.D.H
EBA.TEPC-ALL.D.H
EBA.TIDC-ALL.D.H
EBA.TPWR-ALL.D.H
EBA.WALC-ALL.D.H
EBA.WACM-ALL.D.H
EBA.WAUW-ALL.D.H


Unnamed: 0,EBA.AVA-ALL.D.H,EBA.AZPS-ALL.D.H,EBA.BANC-ALL.D.H,EBA.BPAT-ALL.D.H,EBA.CHPD-ALL.D.H,EBA.CISO-ALL.D.H,EBA.DOPD-ALL.D.H,EBA.EPE-ALL.D.H,EBA.GCPD-ALL.D.H,EBA.IID-ALL.D.H,...,EBA.PSCO-ALL.D.H,EBA.PSEI-ALL.D.H,EBA.SRP-ALL.D.H,EBA.SCL-ALL.D.H,EBA.TEPC-ALL.D.H,EBA.TIDC-ALL.D.H,EBA.TPWR-ALL.D.H,EBA.WALC-ALL.D.H,EBA.WACM-ALL.D.H,EBA.WAUW-ALL.D.H
2015-07-01 07:00:00,,,,,,,,,,,...,4875.0,,,,,,,,,
2015-07-01 08:00:00,1192.0,4478.0,2513.0,,434.0,31486.0,177.0,925.0,663.0,505.0,...,4618.0,3066.0,4103.0,873.0,1605.0,408.0,468.0,1119.0,,
2015-07-01 09:00:00,1108.0,4227.0,2275.0,,422.0,28989.0,169.0,856.0,654.0,482.0,...,4427.0,2865.0,3897.0,833.0,1537.0,380.0,441.0,1018.0,,
2015-07-01 10:00:00,1058.0,4016.0,2104.0,,416.0,27416.0,161.0,839.0,646.0,474.0,...,4344.0,2735.0,3694.0,802.0,1487.0,357.0,420.0,1039.0,,
2015-07-01 11:00:00,1024.0,3879.0,1988.0,,413.0,26388.0,160.0,827.0,642.0,450.0,...,4374.0,2710.0,3590.0,796.0,1470.0,342.0,417.0,1019.0,,


In [4]:
this.head()

Unnamed: 0,EBA.AVA-ALL.D.H,EBA.AZPS-ALL.D.H,EBA.BANC-ALL.D.H,EBA.BPAT-ALL.D.H,EBA.CHPD-ALL.D.H,EBA.CISO-ALL.D.H,EBA.DOPD-ALL.D.H,EBA.EPE-ALL.D.H,EBA.GCPD-ALL.D.H,EBA.IID-ALL.D.H,...,EBA.PSCO-ALL.D.H,EBA.PSEI-ALL.D.H,EBA.SRP-ALL.D.H,EBA.SCL-ALL.D.H,EBA.TEPC-ALL.D.H,EBA.TIDC-ALL.D.H,EBA.TPWR-ALL.D.H,EBA.WALC-ALL.D.H,EBA.WACM-ALL.D.H,EBA.WAUW-ALL.D.H
2015-07-01 07:00:00,,,,,,,,,,,...,4875.0,,,,,,,,,
2015-07-01 08:00:00,1192.0,4478.0,2513.0,,434.0,31486.0,177.0,925.0,663.0,505.0,...,4618.0,3066.0,4103.0,873.0,1605.0,408.0,468.0,1119.0,,
2015-07-01 09:00:00,1108.0,4227.0,2275.0,,422.0,28989.0,169.0,856.0,654.0,482.0,...,4427.0,2865.0,3897.0,833.0,1537.0,380.0,441.0,1018.0,,
2015-07-01 10:00:00,1058.0,4016.0,2104.0,,416.0,27416.0,161.0,839.0,646.0,474.0,...,4344.0,2735.0,3694.0,802.0,1487.0,357.0,420.0,1039.0,,
2015-07-01 11:00:00,1024.0,3879.0,1988.0,,413.0,26388.0,160.0,827.0,642.0,450.0,...,4374.0,2710.0,3590.0,796.0,1470.0,342.0,417.0,1019.0,,


In [5]:
print(this.isnull().sum())


EBA.AVA-ALL.D.H      939
EBA.AZPS-ALL.D.H     652
EBA.BANC-ALL.D.H     744
EBA.BPAT-ALL.D.H    1976
EBA.CHPD-ALL.D.H     455
EBA.CISO-ALL.D.H     455
EBA.DOPD-ALL.D.H    2195
EBA.EPE-ALL.D.H      401
EBA.GCPD-ALL.D.H     432
EBA.IID-ALL.D.H      832
EBA.IPCO-ALL.D.H     534
EBA.LDWP-ALL.D.H    1019
EBA.NEVP-ALL.D.H    1314
EBA.NWMT-ALL.D.H     938
EBA.PACE-ALL.D.H    3144
EBA.PACW-ALL.D.H    4273
EBA.PGE-ALL.D.H      915
EBA.PSCO-ALL.D.H     820
EBA.PSEI-ALL.D.H     721
EBA.SRP-ALL.D.H     1483
EBA.SCL-ALL.D.H      535
EBA.TEPC-ALL.D.H    1701
EBA.TIDC-ALL.D.H     525
EBA.TPWR-ALL.D.H     482
EBA.WALC-ALL.D.H     673
EBA.WACM-ALL.D.H    1249
EBA.WAUW-ALL.D.H    4758
dtype: int64


In [6]:
this.tail()

Unnamed: 0,EBA.AVA-ALL.D.H,EBA.AZPS-ALL.D.H,EBA.BANC-ALL.D.H,EBA.BPAT-ALL.D.H,EBA.CHPD-ALL.D.H,EBA.CISO-ALL.D.H,EBA.DOPD-ALL.D.H,EBA.EPE-ALL.D.H,EBA.GCPD-ALL.D.H,EBA.IID-ALL.D.H,...,EBA.PSCO-ALL.D.H,EBA.PSEI-ALL.D.H,EBA.SRP-ALL.D.H,EBA.SCL-ALL.D.H,EBA.TEPC-ALL.D.H,EBA.TIDC-ALL.D.H,EBA.TPWR-ALL.D.H,EBA.WALC-ALL.D.H,EBA.WACM-ALL.D.H,EBA.WAUW-ALL.D.H
2018-08-24 18:00:00,,4324.0,2021.0,,178.0,30099.0,,,662.0,,...,5721.0,3445.0,4604.0,1099.0,,344.0,517.0,1131.0,3323.0,106.0
2018-08-24 19:00:00,,4613.0,2064.0,,186.0,30339.0,,,668.0,,...,6001.0,3450.0,4899.0,1105.0,,352.0,531.0,1188.0,3436.0,105.0
2018-08-24 20:00:00,,4858.0,2153.0,,188.0,30870.0,,,678.0,,...,6322.0,3443.0,5208.0,1105.0,,365.0,529.0,1228.0,3521.0,107.0
2018-08-24 21:00:00,,5176.0,2262.0,,190.0,31867.0,,,686.0,,...,6690.0,3490.0,5444.0,1109.0,,381.0,522.0,1273.0,3611.0,115.0
2018-08-24 22:00:00,,5383.0,2369.0,,192.0,33046.0,,,688.0,,...,6965.0,3464.0,2551.0,1100.0,,400.0,97.0,1312.0,0.0,114.0


In [7]:
len(this.columns)

27

In [8]:
this.describe()

Unnamed: 0,EBA.AVA-ALL.D.H,EBA.AZPS-ALL.D.H,EBA.BANC-ALL.D.H,EBA.BPAT-ALL.D.H,EBA.CHPD-ALL.D.H,EBA.CISO-ALL.D.H,EBA.DOPD-ALL.D.H,EBA.EPE-ALL.D.H,EBA.GCPD-ALL.D.H,EBA.IID-ALL.D.H,...,EBA.PSCO-ALL.D.H,EBA.PSEI-ALL.D.H,EBA.SRP-ALL.D.H,EBA.SCL-ALL.D.H,EBA.TEPC-ALL.D.H,EBA.TIDC-ALL.D.H,EBA.TPWR-ALL.D.H,EBA.WALC-ALL.D.H,EBA.WACM-ALL.D.H,EBA.WAUW-ALL.D.H
count,26677.0,26964.0,26872.0,25640.0,27161.0,27161.0,25421.0,27215.0,27184.0,26784.0,...,26796.0,26895.0,26133.0,27081.0,25915.0,27091.0,27134.0,26943.0,26367.0,22858.0
mean,1356.557176,3743.307,2036.064007,6297.84688,237.56316,26590.825006,176.793242,977.354216,568.029061,416.949335,...,4992.416032,3406.317531,3424.513106,1109.974225,1683.767934,306.841792,555.165512,1154.468656,2942.012402,92.841981
std,466.526379,14463.85,537.319708,990.477522,102.682058,5177.315353,44.66731,283.870627,82.013924,183.212869,...,928.463177,640.583056,1847.61661,218.045182,689.672251,87.260654,112.219163,3981.858373,381.421052,19.093169
min,748.0,2023.0,0.0,2600.0,-85.0,18068.0,0.0,-7342.0,362.0,-29.0,...,0.0,679.0,-755.0,0.0,890.0,0.0,-12.0,-92392.0,-933.0,0.0
25%,1148.0,2805.0,1696.0,5620.0,159.0,22840.0,143.0,787.0,507.0,281.0,...,4381.75,2958.0,2643.0,974.0,1399.0,250.0,480.0,886.0,2691.0,79.0
50%,1327.0,3275.0,1905.0,6214.0,201.0,25620.0,167.0,901.0,558.0,357.0,...,4890.0,3379.0,3042.0,1109.0,1562.0,284.0,540.0,1016.0,2889.0,90.0
75%,1558.0,4155.25,2151.25,6886.0,293.0,28766.0,205.0,1108.0,619.0,515.0,...,5423.0,3823.0,3935.0,1236.0,1843.0,337.0,625.0,1184.0,3162.0,105.0
max,58854.0,1680538.0,4763.0,11827.0,591.0,49899.0,397.0,5150.0,982.0,1081.0,...,60576.0,5504.0,247000.0,11583.0,66155.0,653.0,998.0,401364.0,14528.0,168.0


In [9]:
min(this.index)

Timestamp('2015-07-01 07:00:00', freq='H')

In [10]:
max(this.index)

Timestamp('2018-08-24 22:00:00', freq='H')