In [1]:
import time
import requests
import pandas as pd

In [5]:

def fetch_eia_data(api_path, *,api_key=None, frequency=None, facets=None,
                   data_vars=None, start=None, end=None,
                   offset=0, length=5000):
    """Fetch one “page” of data from EIA v2."""
    base = "https://api.eia.gov/v2/"
    url = f"{base}{api_path.rstrip('/')}/data/"
    
    # Build params according to EIA spec
    params = {
        "api_key": api_key,
        "offset": offset,
        "length": length
    }
    if frequency is not None:
        params["frequency"] = frequency
    if data_vars is not None:
        # EIA expects something like data[0]=value, data[1]=other
        for i, dv in enumerate(data_vars):
            params[f"data[{i}]"] = dv
    if facets is not None:
        for facet_key, facet_vals in facets.items():
            for i, val in enumerate(facet_vals):
                print(f'facet_val={val}')
                params[f"facets[{facet_key}][{i}]"] = val
    if start is not None:
        params["start"] = start
    if end is not None:
        params["end"] = end
    
    print("Requesting:", url)
    print("Params:", {p: params[p] for p in params if p!= 'api_key'})
    
    resp = requests.get(url, params=params)
    try:
        resp.raise_for_status()
    except requests.HTTPError as e:
        print("ERROR response status:", resp.status_code)
        print("Response text:", resp.text)
        raise
    
    return resp.json()




In [6]:
def fetch_all_for_series(api_path, **kwargs):
    """Fetch all pages (offset-based) for a series, merging them.

    Returns: 
            pandas.DataFrame: a dataframe containing all rows from the api querey
    """
    all_records = []
    offset = 0
    length = kwargs.get("length")
    
    while True:
        response_json = fetch_eia_data(api_path, offset=offset, **kwargs)
        recs = response_json["response"]["data"]
        if not recs:
            break
        all_records.extend(recs)
        # If fewer than length returned, we've reached the end so we're done'
        if len(recs) < length:
            break
        offset += length
        # sleep to slow requests
        time.sleep(0.2)

    #df 
    
    return pd.DataFrame(all_records)

In [None]:
api_path = "electricity/retail-sales"

# use this to filter by state or sector e.g.
# facets = {"stateid: ['CA', 'WI'],
#           "sectorid": ['IND', 'RES'] 
#           }
# to get only date for industrial and residential sectors in california and wisconsin
facets=None

# remember to remove api key before committing to repo
API_KEY = "your api key here."

kwargs = {
    "api_key": API_KEY,
    "frequency": "monthly",
    "data_vars": ["customers", "price", "revenue", "sales"],  # actual data variables
    "facets": facets,
    "start": "2001-01", #start date - Jan 2001 is as far back as we can go
    "end": "2025-07", #end date
    "length": 5000 #rows of data per query
}


# queries the data found here: https://www.eia.gov/opendata/browser/electricity/retail-sales?frequency=monthly&data=customers;price;revenue;sales;&sortColumn=period;&sortDirection=desc;
df = fetch_all_for_series(api_path, **kwargs)

# drop the columns for stateDescription and sectorName since they are redundant
df = df.drop(columns=['stateDescription', 'sectorName'])

Requesting: https://api.eia.gov/v2/electricity/retail-sales/data/
Params: {'offset': 0, 'length': 5000, 'frequency': 'monthly', 'data[0]': 'customers', 'data[1]': 'price', 'data[2]': 'revenue', 'data[3]': 'sales', 'start': '2001-01', 'end': '2025-07'}
Requesting: https://api.eia.gov/v2/electricity/retail-sales/data/
Params: {'offset': 5000, 'length': 5000, 'frequency': 'monthly', 'data[0]': 'customers', 'data[1]': 'price', 'data[2]': 'revenue', 'data[3]': 'sales', 'start': '2001-01', 'end': '2025-07'}
Requesting: https://api.eia.gov/v2/electricity/retail-sales/data/
Params: {'offset': 10000, 'length': 5000, 'frequency': 'monthly', 'data[0]': 'customers', 'data[1]': 'price', 'data[2]': 'revenue', 'data[3]': 'sales', 'start': '2001-01', 'end': '2025-07'}
Requesting: https://api.eia.gov/v2/electricity/retail-sales/data/
Params: {'offset': 15000, 'length': 5000, 'frequency': 'monthly', 'data[0]': 'customers', 'data[1]': 'price', 'data[2]': 'revenue', 'data[3]': 'sales', 'start': '2001-01',

In [8]:
import os
data_dir = os.path.join(os.pardir, 'datasets')

df.to_csv(os.path.join(data_dir, 'state_electricity_price.csv'), index=False)

# multi-index the dataframe first by month and year then by state

df = df.set_index(["period", "stateid", "sectorid"]).sort_index()

df
#df.to_csv('state_electricity_price.csv')


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,customers,price,revenue,sales,customers-units,price-units,revenue-units,sales-units
period,stateid,sectorid,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001-01,AK,ALL,,9.97,51.96404,521.03566,number of customers,cents per kilowatt-hour,million dollars,million kilowatt hours
2001-01,AK,COM,,9.7,20.2141,208.49968,number of customers,cents per kilowatt-hour,million dollars,million kilowatt hours
2001-01,AK,IND,,7.05,6.26039,88.7627,number of customers,cents per kilowatt-hour,million dollars,million kilowatt hours
2001-01,AK,OTH,,13.36,2.36091,17.66485,number of customers,cents per kilowatt-hour,million dollars,million kilowatt hours
2001-01,AK,RES,,11.22,23.12865,206.10843,number of customers,cents per kilowatt-hour,million dollars,million kilowatt hours
...,...,...,...,...,...,...,...,...,...,...
2025-07,WY,COM,60181,10.16,46.39331,456.52285,number of customers,cents per kilowatt-hour,million dollars,million kilowatt hours
2025-07,WY,IND,12067,8.93,68.35194,765.72477,number of customers,cents per kilowatt-hour,million dollars,million kilowatt hours
2025-07,WY,OTH,,,,,number of customers,cents per kilowatt-hour,million dollars,million kilowatt hours
2025-07,WY,RES,289795,14.64,34.96128,238.7893,number of customers,cents per kilowatt-hour,million dollars,million kilowatt hours
