In [1]:
import pandas as pd
import datetime
import requests
import os
import plotly.graph_objects as go

In [2]:
eia_api_key = os.getenv('EIA_API_KEY')


In [19]:
# Set the URL
api_url = "https://api.eia.gov/v2/"
api_path = "electricity/rto/region-data/"
url = api_url + api_path + "data/" + "?api_key=" + eia_api_key + "&data[]=value"




In [4]:
data = requests.get(url).json()
df = pd.DataFrame(data['response']['data'])
print(df.head())


          period respondent   
0  2019-02-27T03       WACM  \
1  2019-02-27T03       WAUW   
2  2019-02-27T03        YAD   
3  2019-02-27T04       AECI   
4  2019-02-27T04       AVRN   

                                     respondent-name type          type-name   
0  Western Area Power Administration - Rocky Moun...   TI  Total interchange  \
1  Western Area Power Administration - Upper Grea...   TI  Total interchange   
2     Alcoa Power Generating, Inc. - Yadkin Division   TI  Total interchange   
3              Associated Electric Cooperative, Inc.   TI  Total interchange   
4                           Avangrid Renewables, LLC   TI  Total interchange   

    value    value-units  
0  2259.0  megawatthours  
1   -52.0  megawatthours  
2   176.0  megawatthours  
3   290.0  megawatthours  
4   654.0  megawatthours  


In [12]:
df.head

<bound method NDFrame.head of              period respondent   
0     2021-10-21T09        CAR  \
1     2021-10-21T04        CAR   
2     2021-10-20T22       ISNE   
3     2021-10-20T18       SOCO   
4     2021-10-21T16       MISO   
...             ...        ...   
4995  2021-12-09T06       PSEI   
4996  2021-12-12T07        AVA   
4997  2021-12-12T07       PSEI   
4998  2021-12-12T07       LGEE   
4999  2021-12-12T07       IPCO   

                                        respondent-name type   
0                                             Carolinas   DF  \
1                                             Carolinas   NG   
2                                       ISO New England   TI   
3               Southern Company Services, Inc. - Trans   NG   
4        Midcontinent Independent System Operator, Inc.   DF   
...                                                 ...  ...   
4995                           Puget Sound Energy, Inc.   DF   
4996                                 Avista Corpo

In [13]:
url = api_url + api_path + "data/" + "?api_key=" + eia_api_key + "&data[0]=value"
data = requests.get(url).json()
df = pd.DataFrame(data['response']['data'])
print(df.head())

          period respondent                                 respondent-name   
0  2021-10-21T09        CAR                                       Carolinas  \
1  2021-10-21T04        CAR                                       Carolinas   
2  2021-10-20T22       ISNE                                 ISO New England   
3  2021-10-20T18       SOCO         Southern Company Services, Inc. - Trans   
4  2021-10-21T16       MISO  Midcontinent Independent System Operator, Inc.   

  type                  type-name    value    value-units  
0   DF  Day-ahead demand forecast  17104.0  megawatthours  
1   NG             Net generation  18415.0  megawatthours  
2   TI          Total interchange  -2408.0  megawatthours  
3   NG             Net generation  27503.0  megawatthours  
4   DF  Day-ahead demand forecast  73049.0  megawatthours  


In [18]:
api_path = "electricity/rto/region-data/"

if api_path[-1] == "/":
    api_path = api_path[0:(len(api_path) -1)]

print(api_path)


electricity/rto/region-data


In [3]:
def day_offset(start, end, offset):
    current = [start]
    while max(current) < end:
        if(max(current) + datetime.timedelta(days= offset) < end):
            current.append(max(current) + datetime.timedelta(days= offset))
        else:
           current.append(end) 
           
    return current

def hour_offset(start, end, offset):
    current = [start]
    while max(current) < end:
        if(max(current) + datetime.timedelta(hours = offset) < end):
            current.append(max(current) + datetime.timedelta(hours = offset))
        else:
           current.append(end) 
           
    return current

In [4]:
def eia_get(api_key, 
            api_path, 
            data = "value", 
            facets = None, 
            start = None, 
            end = None, 
            length = None, 
            offset = None, 
            frequency = None):
    
    class response:
        def __init__(output, data, url, parameters):
            output.data = data
            output.url = url
            output.parameters = parameters
    
    if type(api_key) is not str:
        print("Error: The api_key argument is not a valid string")
        return
    elif len(api_key) != 40:
        print("Error: The length of the api_key is not valid, must be 40 characters")
        return
    
    if api_path[-1] != "/":
        api_path = api_path + "/"
    
    if facets is None:
        fc = ""
    else:
        fc = ""
        for i in facets.keys():
            for n in facets[i]:
                fc = fc + "&facets[" + i + "][]=" + n
    
    if start is None:
        s = ""
    else:
        if  type(start) is datetime.date:
            s = "&start=" + start.strftime("%Y-%m-%d")
        elif type(start) is datetime.datetime:
            s = "&start=" + start.strftime("%Y-%m-%dT%H")
        else:
            print("Error: The start argument is not a valid date or time object")
            return
             

    if end is None:
        e = ""
    else:
        if  type(end) is datetime.date:
            e = "&end=" + end.strftime("%Y-%m-%d")
        elif type(end) is datetime.datetime:
            e = "&end=" + end.strftime("%Y-%m-%dT%H")
        else:
            print("Error: The end argument is not a valid date or time object")
            return

    if length is None:
        l = ""
    else:
        l = "&length=" + str(length)

    if offset is None:
        o = ""
    else: 
        o = "&offset=" + str(offset)

    if frequency is None:
        fr = ""
    else:
        fr = "&frequency=" + str(fr)

    url = "https://api.eia.gov/v2/" + api_path + "?data[]=value" + fc + s + e + l + o + fr          

    
    d = requests.get(url + "&api_key=" + api_key).json()

    df = pd.DataFrame(d['response']['data'])

    parameters = {
        "api_path": api_path,
        "data" : data,
        "facets": facets, 
        "start": start, 
        "end": end, 
        "length": length, 
        "offset": offset, 
        "frequency": frequency
    }
    output = response(data = df, url = url + "&api_key=", parameters = parameters)
    return output



In [5]:
facets = {
    "respondent": ["US48", "WACM"],
    "type": "D"
}
df = eia_get(api_key = eia_api_key, 
        api_path = "electricity/rto/region-data/data", 
        data = "value", 
        facets = facets, 
        length= None, 
        start = datetime.datetime(2023, 12, 1, 0),
        end = datetime.datetime(2023, 12, 1, 1),
        offset = 0, 
        frequency = None)


print(df)

<__main__.eia_get.<locals>.response object at 0xffff7c0a6440>


In [6]:
print(df.parameters)
print(df.url)
x = df.data._append(df.data)

{'api_path': 'electricity/rto/region-data/data/', 'data': 'value', 'facets': {'respondent': ['US48', 'WACM'], 'type': 'D'}, 'start': datetime.datetime(2023, 12, 1, 0, 0), 'end': datetime.datetime(2023, 12, 1, 1, 0), 'length': None, 'offset': 0, 'frequency': None}
https://api.eia.gov/v2/electricity/rto/region-data/data/?data[]=value&facets[respondent][]=US48&facets[respondent][]=WACM&facets[type][]=D&start=2023-12-01T00&end=2023-12-01T01&offset=0&api_key=


In [7]:
def eia_backfile(start, end, offset, api_key, api_path, facets):
    
    class response:
        def __init__(output, data, parameters):
            output.data = data
            output.parameters = parameters
    
    if type(api_key) is not str:
        print("Error: The api_key argument is not a valid string")
        return
    elif len(api_key) != 40:
        print("Error: The length of the api_key is not valid, must be 40 characters")
        return
    
    if api_path[-1] != "/":
        api_path = api_path + "/"    

    if  type(start) is datetime.date:
        s = "&start=" + start.strftime("%Y-%m-%d")
    elif type(start) is datetime.datetime:
        s = "&start=" + start.strftime("%Y-%m-%dT%H")
    else:
        print("Error: The start argument is not a valid date or time object")
        return
             

    if  type(end) is datetime.date:
        e = "&end=" + end.strftime("%Y-%m-%d")
    elif type(end) is datetime.datetime:
        e = "&end=" + end.strftime("%Y-%m-%dT%H")
    else:
        print("Error: The end argument is not a valid date or time object")
        return
    
    if  type(start) is datetime.date:
        time_vec_seq = day_offset(start = start, end = end, offset = offset)
    elif  type(start) is datetime.datetime:
        time_vec_seq = hour_offset(start = start, end = end, offset = offset)

    
    for i in range(len(time_vec_seq[:-1])):
        start = time_vec_seq[i]
        end = time_vec_seq[i + 1] -  datetime.timedelta(hours = 1)
        temp = eia_get(api_key = api_key, 
                       api_path = api_path, 
                       facets= facets, 
                       start = start,
                       data = "value", 
                       end = end)
        if i == 0:
            df = temp.data
        else:
            df = df._append(temp.data)

    parameters = {
        "api_path": api_path,
        "data" : "value",
        "facets": facets, 
        "start": start, 
        "end": end, 
        "length": None, 
        "offset": offset, 
        "frequency":None
    }
    output = response(data = df, parameters = parameters)
    return output

        


In [8]:
df = eia_backfile(api_key = eia_api_key, 
        api_path = "electricity/rto/region-data/data", 
        facets = facets, 
        start = datetime.datetime(2021, 12, 1, 0),
        end = datetime.datetime(2023, 12, 1, 1),
        offset = 1000)   

df.data

Unnamed: 0,period,respondent,respondent-name,type,type-name,value,value-units
0,2021-12-03T05,US48,United States Lower 48,D,Demand,410581.0,megawatthours
1,2021-12-03T04,WACM,Western Area Power Administration - Rocky Moun...,D,Demand,2523.0,megawatthours
2,2022-01-08T20,US48,United States Lower 48,D,Demand,473921.0,megawatthours
3,2022-01-10T02,US48,United States Lower 48,D,Demand,489726.0,megawatthours
4,2021-12-15T19,US48,United States Lower 48,D,Demand,458388.0,megawatthours
...,...,...,...,...,...,...,...
1037,2023-11-18T14,US48,United States Lower 48,D,Demand,397812.0,megawatthours
1038,2023-11-18T21,WACM,Western Area Power Administration - Rocky Moun...,D,Demand,3460.0,megawatthours
1039,2023-11-20T03,US48,United States Lower 48,D,Demand,434463.0,megawatthours
1040,2023-11-18T16,WACM,Western Area Power Administration - Rocky Moun...,D,Demand,3532.0,megawatthours


In [22]:
d = df.data.sort_values(by = ["period"])
d = d[d["respondent"] == "US48"]
p = go.Figure()
p.add_trace(go.Scatter(x = d["period"], y = d["value"],
                       mode='lines',
                    name='data',
                    line=dict(color='royalblue', width=2)))
p.show()

In [27]:
api_path = "electricity/rto/region-data/data"
facets = {
    "respondent": "US48",
    "type": "D"
}

start = datetime.datetime(2023, 7, 1, 0)
end = datetime.datetime(2023, 12, 15, 0)

df = eia_backfile(api_key = eia_api_key, 
        api_path = api_path, 
        facets = facets, 
        start = start,
        end = end,
        offset = 2000)   

In [28]:
df.data.head

<bound method NDFrame.head of Empty DataFrame
Columns: []
Index: []>

In [None]:
d = df.data.sort_values(by = ["period"])
d = d[d["respondent"] == "US48"]
p = go.Figure()
p.add_trace(go.Scatter(x = d["period"], y = d["value"],
                       mode='lines',
                    name='data',
                    line=dict(color='royalblue', width=2)))

In [15]:
start=datetime.date(2023,12,1)
end=datetime.date(2023,12,2)
x = end - start
x.days


1

In [18]:
start=datetime.datetime(2023,12,1,0)
end=datetime.datetime(2023,12,2,1)
x = end - start
x.min


datetime.timedelta(days=-999999999)

In [18]:
start = datetime.date(2023,12,1)
print(start) 
current = [start]

current.append(start + datetime.timedelta(days= 2))
print(current)
max(current)

2023-12-01
[datetime.date(2023, 12, 1), datetime.date(2023, 12, 3)]


datetime.date(2023, 12, 3)

In [29]:
def day_offset(start, end, offset):
    current = [start]
    while max(current) < end:
        if(max(current) + datetime.timedelta(days= offset) <= end):
            current.append(max(current) + datetime.timedelta(days= offset))
        else:
           current.append(max(current) + end) 
           
    return current

def hour_offset(start, end, offset):
    current = [start]
    while max(current) < end:
        if(max(current) + datetime.timedelta(hours = offset) <= end):
            current.append(max(current) + datetime.timedelta(hours = offset))
        else:
           current.append(max(current) + end) 
           
    return current
        
day_offset(start = datetime.date(2023,12,1), end = datetime.date(2023,12,31), offset = 1)

hour_offset(start = datetime.datetime(2023,12,1,0), end = datetime.datetime(2023,12,20,0), offset = 12)

[datetime.datetime(2023, 12, 1, 0, 0),
 datetime.datetime(2023, 12, 1, 12, 0),
 datetime.datetime(2023, 12, 2, 0, 0),
 datetime.datetime(2023, 12, 2, 12, 0),
 datetime.datetime(2023, 12, 3, 0, 0),
 datetime.datetime(2023, 12, 3, 12, 0),
 datetime.datetime(2023, 12, 4, 0, 0),
 datetime.datetime(2023, 12, 4, 12, 0),
 datetime.datetime(2023, 12, 5, 0, 0),
 datetime.datetime(2023, 12, 5, 12, 0),
 datetime.datetime(2023, 12, 6, 0, 0),
 datetime.datetime(2023, 12, 6, 12, 0),
 datetime.datetime(2023, 12, 7, 0, 0),
 datetime.datetime(2023, 12, 7, 12, 0),
 datetime.datetime(2023, 12, 8, 0, 0),
 datetime.datetime(2023, 12, 8, 12, 0),
 datetime.datetime(2023, 12, 9, 0, 0),
 datetime.datetime(2023, 12, 9, 12, 0),
 datetime.datetime(2023, 12, 10, 0, 0),
 datetime.datetime(2023, 12, 10, 12, 0),
 datetime.datetime(2023, 12, 11, 0, 0),
 datetime.datetime(2023, 12, 11, 12, 0),
 datetime.datetime(2023, 12, 12, 0, 0),
 datetime.datetime(2023, 12, 12, 12, 0),
 datetime.datetime(2023, 12, 13, 0, 0),
 datet

In [43]:
time_vec_seq = hour_offset(start = datetime.datetime(2021,1,1,0), end = datetime.datetime(2023,12,31,23), offset = 2000)

# print(time_veq_seq)

for i in range(len(time_vec_seq[:-1])):
    start = time_vec_seq[i]
    end = time_vec_seq[i + 1] -  datetime.timedelta(hours = 1)

    print(start, end )




2021-01-01 00:00:00 2021-03-25 07:00:00
2021-03-25 08:00:00 2021-06-16 15:00:00
2021-06-16 16:00:00 2021-09-07 23:00:00
2021-09-08 00:00:00 2021-11-30 07:00:00
2021-11-30 08:00:00 2022-02-21 15:00:00
2022-02-21 16:00:00 2022-05-15 23:00:00
2022-05-16 00:00:00 2022-08-07 07:00:00
2022-08-07 08:00:00 2022-10-29 15:00:00
2022-10-29 16:00:00 2023-01-20 23:00:00
2023-01-21 00:00:00 2023-04-14 07:00:00
2023-04-14 08:00:00 2023-07-06 15:00:00
2023-07-06 16:00:00 2023-09-27 23:00:00
2023-09-28 00:00:00 2023-12-20 07:00:00
2023-12-20 08:00:00 2023-12-31 22:00:00
