In [1]:
import json
import numpy as np
import pandas as pd
from urllib.error import URLError, HTTPError
from urllib.request import urlopen

from datetime import datetime
from dateutil.parser import parse

In [2]:
'''
Code copied from https://quantcorner.wordpress.com/2014/11/18/downloading-eias-data-with-python/ on 8/13/2018
'''

class EIAgov(object):
    def __init__(self, token, series):
        '''
        Purpose:
        Initialise the EIAgov class by requesting:
        - EIA token
        - id code(s) of the series to be downloaded

        Parameters:
        - token: string
        - series: string or list of strings
        '''
        self.token = token
        self.series = series

    def Raw(self, ser):
            # Construct url
            url = 'http://api.eia.gov/series/?api_key=' + self.token + '&series_id=' + ser.upper()
            #url = 'http://api.eia.gov/category/?api_key=' + self.token + '&category_id=' + ser.id

            try:
                # URL request, URL opener, read content
                response = urlopen(url);
                raw_byte = response.read()
                raw_string = str(raw_byte, 'utf-8-sig')
                jso = json.loads(raw_string)
                return jso

            except HTTPError as e:
                print('HTTP error type.')
                print('Error code: ', e.code)

            except URLError as e:
                print('URL type error.')
                print('Reason: ', e.reason)

    def GetData(self):
            # Deal with the date series                       
            date_ = self.Raw(self.series[0])        
            date_series = date_['series'][0]['data']
            endi = len(date_series) # or len(date_['series'][0]['data'])
            date = []
            for i in range (endi):
                date.append(date_series[i][0])

            # Create dataframe
            df = pd.DataFrame(data=date)
            df.columns = ['Date']

            # Deal with data
            lenj = len(self.series)
            for j in range (lenj):
                data_ = self.Raw(self.series[j])
                data_series = data_['series'][0]['data']
                data = []
                endk = len(date_series)         
                for k in range (endk):
                    data.append(data_series[k][1])
                df[self.series[j]] = data

            return df



In [27]:
#Download multiple BA demand data
#To do: download all time series data per BA and do quality checks; but first check if EIA will release code they use for QC

if __name__ == '__main__':
    tok = '6d666bb7097e41102ef69a35aa1edb2b'

    demand_list0 = [['EBA.AVA-ALL.D.H'],['EBA.BANC-ALL.D.H']]
    demand_list1 = ['EBA.AVA-ALL.D.H','EBA.BANC-ALL.D.H']
    
    i=0
    df={}
    for x in demand_list0:
        BA = x[0]
        d = EIAgov(tok, x)
        df[BA] = d.GetData()
        df[BA].index = pd.to_datetime(df[BA]['Date'])
        df[BA].drop(columns =['Date'], inplace=True)
        df[BA] = df[BA].resample('H').asfreq()
        
            
    df_all = pd.DataFrame()
    for x in demand_list1:
        df_all = pd.concat([df_all,df[x]])
        


In [28]:
df

{'EBA.AVA-ALL.D.H':                      EBA.AVA-ALL.D.H
 Date                                
 2015-07-01 08:00:00           1192.0
 2015-07-01 09:00:00           1108.0
 2015-07-01 10:00:00           1058.0
 2015-07-01 11:00:00           1024.0
 2015-07-01 12:00:00           1031.0
 2015-07-01 13:00:00              NaN
 2015-07-01 14:00:00              NaN
 2015-07-01 15:00:00              NaN
 2015-07-01 16:00:00              NaN
 2015-07-01 17:00:00              NaN
 2015-07-01 18:00:00              NaN
 2015-07-01 19:00:00              NaN
 2015-07-01 20:00:00              NaN
 2015-07-01 21:00:00              NaN
 2015-07-01 22:00:00              NaN
 2015-07-01 23:00:00              NaN
 2015-07-02 00:00:00              NaN
 2015-07-02 01:00:00           1838.0
 2015-07-02 02:00:00           1800.0
 2015-07-02 03:00:00           1761.0
 2015-07-02 04:00:00           1670.0
 2015-07-02 05:00:00           1609.0
 2015-07-02 06:00:00           1467.0
 2015-07-02 07:00:00           

In [17]:
print(df.isnull().sum())
df.head()

EBA.AVA-ALL.D.H    0
dtype: int64


Unnamed: 0_level_0,EBA.AVA-ALL.D.H
Date,Unnamed: 1_level_1
2018-08-23 01:00:00,1576.0
2018-08-23 00:00:00,1563.0
2018-08-22 23:00:00,1533.0
2018-08-22 22:00:00,1495.0
2018-08-22 21:00:00,1445.0


In [18]:
df = df.resample('H').asfreq()
print(df.isnull().sum())
df.head()

EBA.AVA-ALL.D.H    899
dtype: int64


Unnamed: 0_level_0,EBA.AVA-ALL.D.H
Date,Unnamed: 1_level_1
2015-07-01 08:00:00,1192.0
2015-07-01 09:00:00,1108.0
2015-07-01 10:00:00,1058.0
2015-07-01 11:00:00,1024.0
2015-07-01 12:00:00,1031.0
