In [1]:
import sys
import json
import requests

def get_api_data(api_key, start_dt_hr, end_dt_hr):
    eia_url = 'https://api.eia.gov/v2/electricity/rto/region-sub-ba-data/data/'
    subset_request = {
        'api_key': api_key,
        'frequency': 'hourly',
        'data[0]': 'value',
        'facets[subba][]': 'PGAE',
        'facets[subba][]': 'SCE',
        'start': start_dt_hr,
        'end': end_dt_hr,
        'sort[0][column]': 'period',
        'sort[0][direction]': 'desc',
        'offset': 0,
        'length': 5000
    }

    resp = requests.get(eia_url, params=subset_request)
    
    return resp.json()

def parse_electricity_data(json_data):
    electricity_pd_df = pd.DataFrame(json_data['response']['data'])\
                        .rename(columns={'period': 'date_hr', 
                                         'subba-name': 'subregion_name', 
                                         'subba':'subregion_abbrv', 
                                         'parent': 'parent_abbrv', 
                                         'parent-name': 'parent_name', 
                                         'value': 'megawatthours'})
    return electricity_pd_df


In [2]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
import time
import pandas as pd

api_key = open("./api_key.txt", "r").read()
pull_start = datetime(2025, 1, 1)
#pull_end = datetime(2025, 1, 19)
pull_end = datetime(2025, 2, 19)

pd_arr = []

while pull_start < pull_end:
    start_dt_hr = pull_start.strftime("%Y-%m-%dT00")
    end_dt_hr = (pull_start+relativedelta(weeks=1)-relativedelta(days=1)).strftime("%Y-%m-%dT23")
    print("fetching data from week of "+start_dt_hr)
    api_data = get_api_data(api_key, start_dt_hr, end_dt_hr)
    pd_df = parse_electricity_data(api_data)
    pd_arr.append(pd_df)
    pull_start+=relativedelta(weeks=1)
    time.sleep(30)#pause for 30 seconds

full_electricity_df = pd.concat(pd_arr)

fetching data from week of 2025-01-01T00
fetching data from week of 2025-01-08T00
fetching data from week of 2025-01-15T00
fetching data from week of 2025-01-22T00
fetching data from week of 2025-01-29T00
fetching data from week of 2025-02-05T00
fetching data from week of 2025-02-12T00


In [3]:
full_electricity_df.to_pickle("cal_electricity_mwh.pickle")