# Electricity Generation from api.electricitymap.org
---


SECTION 1 
collect data on carbon intensity, zones within SW , and power breakdown from the electricitymap api

In [529]:
# Dependencies and Setup

import pandas as pd
import requests
from datetime import datetime, timedelta

In [530]:

# zones for electrical utilities in US
zones = ["US-SW-PNM", "US-SW-EPE", "US-SW-WALC", "US-NW-PACE", "US-NW-PSCO", "US-CENT-SWPP", "US-TEX-ERCO", "US-MIDW-AECI","US-SW-AZPS","US-SW-AZPS",
         "US-NW-WACM", "US-SW-SRP", "US-SW-TEPC", "US-CENT-SPA", "US-CAL-IID", "US-CAL-CISO", "US-CAL-BANC","US-CAL-BANC", "US-CAL-TIDC", 
          "US-CAR-CPLE", "US-CAR-CPLW", "US-CAR-DUK", "US-CAR-SC", "US-CAR-SCEG", "US-CAR-YAD", "US-FLA-FMPP", "US-FLA-FPC" , "US-FLA-FPL",
          "US-FLA-GVL" , "US-FLA-HST", "US-FLA-JEA", "US-FLA-SEC", "US-FLA-TAL", "US-FLA-TEC", "US-MIDW-AECI" , "US-MIDW-LGEE", "US-MIDW-MISO",
          "US-NE-ISNE", "US-NW-BPAT", "US-NW-CHPD", "US-NW-DOPD", "US-NW-GCPD", "US-NW-GRID",  "US-NW-IPCO" , "US-NW-NWMT", "US-NW-NEVP", 
           "US-NW-PACW",  "US-NW-PGE", "US-NW-PSEI", "US-NW-SCL", "US-NW-TPWR", "US-NW-WAUW", "US-NY-NYIS", "US-SE-SEPA", "US-SE-SOCO" , 
           "US-TEN-TVA"]


In [531]:
# simplify region names by removing "US-" for use in recording the data
regions = []
for zone in zones:
    region_code = zone[3:]
    new_code = region_code.replace('-', '_',1)
    regions.append(new_code)  

In [532]:
# get carbon intensity history for the US utilities
urls = []
for index, url in enumerate(zones):
    url = f'https://api.electricitymap.org/v3/carbon-intensity/history?zone={zones[index]}'
    urls.append(url)

# store responses in responses_dict dictionary
responses_dict = {}
for idx, url in enumerate(urls):
    response = requests.get(url,headers={"auth-token": f"zheg2KxORQLaW"})
    responses_dict[f"response_{idx+1}"] = response.json()


In [534]:
#request power breakdown for the US utilities
pburls = []
for index, url in enumerate(zones):
    pburl = f'https://api.electricitymap.org/v3/power-breakdown/history?zone={zones[index]}'
    pburls.append(pburl)

#store response in power_breakdown_responses_dict dictionary
power_breakdown_responses_dict = {}
for idx, pburl in enumerate(pburls):
    response = requests.get(pburl,headers={"auth-token": f"zheg2KxORQLaW"})
    power_breakdown_responses_dict[f"response_{idx+1}"] = response.json()


In [535]:
power_breakdown_responses_dict

{'response_1': {'zone': 'US-SW-PNM',
  'history': [{'zone': 'US-SW-PNM',
    'datetime': '2025-03-07T23:00:00.000Z',
    'updatedAt': '2025-03-07T23:44:30.906Z',
    'createdAt': '2025-03-04T23:43:13.185Z',
    'powerConsumptionBreakdown': {'nuclear': 0,
     'geothermal': 0,
     'biomass': 0,
     'coal': 152,
     'wind': 1292,
     'solar': 734,
     'hydro': 0,
     'gas': 293,
     'oil': 0,
     'unknown': 0,
     'hydro discharge': 0,
     'battery discharge': 390},
    'powerProductionBreakdown': {'nuclear': None,
     'geothermal': 0,
     'biomass': None,
     'coal': 152,
     'wind': 1292,
     'solar': 734,
     'hydro': 0,
     'gas': 293,
     'oil': None,
     'unknown': None,
     'hydro discharge': None,
     'battery discharge': 390},
    'powerImportBreakdown': {},
    'powerExportBreakdown': {},
    'fossilFreePercentage': 84,
    'renewablePercentage': 84,
    'powerConsumptionTotal': 2861,
    'powerProductionTotal': 2861,
    'powerImportTotal': None,
    'powe

In [536]:
#  Create a dictionary with the desired keys and values for response 1 from each dictionary
carbon_intensity =[]
dateandtime = []
isEstimated = []
nuc = []
geo = []
bio = []
coal = []
wind = []
solar = []
hydro = []
gas =[]
oil = []
unknown = []
powerImport = []
powerExport = []
powerProdTotal = []
hydro_discharge = []
battery_discharge = []
Powest = []
region = []

#populate each list with values from the dictionaries
for j in range(len(power_breakdown_responses_dict)):
    # j is or each region (each region is a response)
    for i in range(24):
        # i is for each hour (24 hours)
        CI = responses_dict[f'response_{j+1}']['history'][i]['carbonIntensity']
        carbon_intensity.append(CI)
        DT = responses_dict[f'response_{j+1}']['history'][i]['datetime']
        dateandtime.append(DT)
        EST = responses_dict[f'response_{j+1}']['history'][i]['isEstimated']
        isEstimated.append(EST)
        NUC = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['nuclear']
        nuc.append(NUC)
        GEO = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['geothermal']
        geo.append(GEO)
        BIO = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['biomass']
        bio.append(BIO)
        COAL = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['coal']
        coal.append(COAL)
        WIND = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['wind']
        wind.append(WIND)
        SOLAR = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['solar']
        solar.append(SOLAR)
        HYDRO = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['hydro']
        hydro.append(HYDRO)
        GAS = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['gas']
        gas.append(GAS)
        OIL = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['oil']
        oil.append(OIL)
        UNKNOWN = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['unknown']
        unknown.append(UNKNOWN)
        HYDDIS = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['hydro discharge']
        hydro_discharge.append(HYDDIS)
        BATDIS = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['battery discharge']
        battery_discharge.append(BATDIS)
        PPRODTOT = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['powerProductionTotal']
        powerProdTotal.append(PPRODTOT)
        PIMPTOT = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['powerImportTotal']
        powerImport.append(PIMPTOT)
        PEXPTOT = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['powerExportTotal']
        powerExport.append(PEXPTOT)
        POWEST = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['isEstimated']
        Powest.append(POWEST)
        region.append(regions[j])
        

In [537]:
# Create Series and then Create the dataframe
ser_carbon_intensity = pd.Series(carbon_intensity)
ser_dateandtime = pd.Series(dateandtime)
ser_EST = pd.Series(isEstimated)
df = pd.DataFrame({'region': region, 'datetime':ser_dateandtime,'carbonIntensity':ser_carbon_intensity,'isEstimated':ser_EST,
                          'nuclear (GW)':nuc,'geothermal (GW)':geo, 'biomass (GW)':bio, 'coal (GW)':coal, 'wind (GW)':wind,
                          'solar (GW)':solar, 'hydro (GW)':hydro, 'gas (GW)': gas, 'oil (GW)': oil, 'unknown (GW)':unknown, 
                          'hydro_discharge (GW)':hydro_discharge, 'battery_discharge (GW)':battery_discharge,
                          'PowerProductionTotal (GW)':powerProdTotal, 'PowerImportTotal (GW)':powerImport, 
                          'PowerExportTotal (GW)':powerExport, 'ConsumptionEstimated':Powest})


SECTION 2     
PowerBreakdown data transformation

In [538]:
# fill NA values with zeroes for energy values
df = df.fillna({'carbonIntensity':0,'nuclear (GW)': 0,'geothermal (GW)': 0,'biomass (GW)': 0, 'coal (GW)': 0, 'wind (GW)': 0, 'solar (GW)': 0, 
           'hydro (GW)': 0, 'gas (GW)': 0, 'oil (GW)': 0, 'unknown (GW)': 0, 'hydro_discharge (GW)':0, 
           'battery_discharge (GW)':0, 'PowerImportTotal (GW)':0, 'PowerExportTotal (GW)':0})

In [539]:
# Date Time work

# import datetime dependencies

from datetime import datetime

# set up lists to hold parsed data and DateTime as a datetime datetype
dates=[]
times = []
DateTime =[]

# convert date time strings
for i in range(len(df['datetime'])):

    # Parse the timestamp string to a datetime object
    dt_obj = datetime.strptime(df.iloc[i,1], '%Y-%m-%dT%H:%M:%S.%fZ')

    date = dt_obj.strftime('%Y-%m-%d')
    time = dt_obj.strftime('%H:%M:%S')

#add the new times and dates to lists

    dates.append(date)
    times.append(time)
    DateTime.append(dt_obj)

# add the times and dates to new columns in the data frame
df['UTC time'] = times
df['UTC date'] = dates
df['UTC DateTime'] = DateTime

In [540]:
#  reindex the data frame to make the UTC DateTime column the index 
df_reindex = df.set_index("UTC DateTime", drop=True, inplace=True)

In [541]:
#drop the datetime column that contains a string
df_reindex = df.drop('datetime', axis=1)

Section 3
Merge with previous data

In [542]:
# import previous cleaned file into a pandas dataframe
df_us_energy = pd.read_csv('data/allRegions/simplifiedrunningfile.csv')
df_us_energy_dropped = df_us_energy.drop_duplicates(subset=['UTC date', 'UTC time', 'region'],keep='first')
df_us_energy_reindex=df_us_energy_dropped.set_index("UTC DateTime")

In [543]:
# concatentate current data with existing file
df_both = pd.concat([df_us_energy_reindex,df_reindex,])
# sort the rows by region and by date
df_sort = df_both.sort_values(by=['region', 'UTC date'])

In [544]:
# drop duplicate rows that have the same region, UTC time, and UTC date.  Keep the last row as it may contain updated data
df_final = df_sort.drop_duplicates(subset=['region', 'UTC time', 'UTC date'],keep='last')

In [545]:

df_final.to_csv('data/allRegions/simplifiedrunningfile.csv')

In [546]:
df_final.describe()

Unnamed: 0,carbonIntensity,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),gas (GW),oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW)
count,118660.0,118667.0,118667.0,118667.0,118667.0,118667.0,118667.0,118667.0,118667.0,118667.0,118667.0,118667.0,118667.0,118461.0,118667.0,118667.0
mean,397.535564,1049.536274,14.217997,18.531251,1227.00086,818.469162,470.303892,445.494013,3005.14341,2.245544,76.431165,-56.999511,-3.334988,7162.411545,181.033893,76.601751
std,238.739221,2257.277253,102.666473,93.754104,3268.519218,2899.610172,2079.852876,1009.37871,6162.681334,38.916869,200.561538,2545.077333,436.960179,13845.890607,719.31644,298.092009
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-297409.0,-7619.0,0.0,0.0,0.0
25%,241.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,168.0,0.0,0.0,0.0,0.0,584.0,0.0,0.0
50%,415.0,0.0,0.0,0.0,0.0,0.0,0.0,49.0,767.0,0.0,0.0,0.0,0.0,1968.0,0.0,0.0
75%,566.0,650.0,0.0,0.0,903.0,191.0,106.0,393.5,2023.0,0.0,56.0,0.0,0.0,6418.0,0.0,0.0
max,1250.0,13084.0,861.0,685.0,33723.0,31936.0,24715.0,26491.0,55662.0,2853.0,13129.0,1375.0,7849.0,110223.0,8578.0,9658.0


In [547]:
df_final['biomass (GW)'].astype(int)

UTC DateTime
2024-08-10 23:00:00    0
2024-08-11 00:00:00    0
2024-08-11 01:00:00    0
2024-08-11 02:00:00    0
2024-08-11 03:00:00    0
                      ..
8/9/2024 17:00         0
8/9/2024 18:00         0
8/9/2024 19:00         0
8/9/2024 20:00         0
8/9/2024 21:00         0
Name: biomass (GW), Length: 118667, dtype: int32

In [548]:
df_biomass = df_final.loc[df_final['biomass (GW)'] > 0]

In [549]:
df_biomass.describe()

Unnamed: 0,carbonIntensity,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),gas (GW),oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW)
count,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0
mean,326.19741,2177.291648,251.184606,327.38544,3134.990472,4972.750633,2311.726664,1423.020396,8386.15766,8.899211,9.597886,-0.210362,-67.915141,23370.0728,505.049129,444.752121
std,133.290649,693.185244,355.961722,232.764744,4877.031257,5992.487623,5323.527894,1068.706614,3810.67035,47.995301,20.177277,4.789832,1834.696816,10479.060687,699.731034,507.065988
min,35.0,538.0,0.0,7.0,0.0,2.0,0.0,196.0,0.0,0.0,0.0,-179.0,-7619.0,7479.0,0.0,0.0
25%,255.0,1887.0,0.0,13.0,0.0,482.0,0.0,649.0,5834.0,0.0,0.0,0.0,0.0,13012.0,0.0,23.0
50%,319.0,2218.0,0.0,423.0,0.0,2139.0,28.0,1165.0,7770.0,0.0,0.0,0.0,0.0,23470.0,194.0,248.0
75%,380.0,2273.0,730.0,506.0,6313.0,7766.0,497.0,1814.0,10314.0,0.0,20.0,0.0,14.0,31653.0,808.0,731.0
max,704.0,3362.0,861.0,685.0,18579.0,21843.0,19379.0,5660.0,24771.0,1022.0,681.0,0.0,7849.0,52314.0,5261.0,3018.0


In [550]:
biomassRegions = df_biomass['region'].unique()
biomassRegions

array(['CAL_CISO', 'CENT_SWPP', 'NE_ISNE'], dtype=object)

In [551]:
df_biomassregions = df_final.loc[(df_final['region'] == 'CAL_CISO') | (df_final['region'] == 'CENT_SWPP') | (df_final['region'] == 'NE_ISNE') ]
df_biomassregions.describe()

Unnamed: 0,carbonIntensity,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),gas (GW),oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW)
count,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0,6717.0
mean,326.19741,2177.291648,251.184606,327.38544,3134.990472,4972.750633,2311.726664,1423.020396,8386.15766,8.899211,9.597886,-0.210362,-67.915141,23370.0728,505.049129,444.752121
std,133.290649,693.185244,355.961722,232.764744,4877.031257,5992.487623,5323.527894,1068.706614,3810.67035,47.995301,20.177277,4.789832,1834.696816,10479.060687,699.731034,507.065988
min,35.0,538.0,0.0,7.0,0.0,2.0,0.0,196.0,0.0,0.0,0.0,-179.0,-7619.0,7479.0,0.0,0.0
25%,255.0,1887.0,0.0,13.0,0.0,482.0,0.0,649.0,5834.0,0.0,0.0,0.0,0.0,13012.0,0.0,23.0
50%,319.0,2218.0,0.0,423.0,0.0,2139.0,28.0,1165.0,7770.0,0.0,0.0,0.0,0.0,23470.0,194.0,248.0
75%,380.0,2273.0,730.0,506.0,6313.0,7766.0,497.0,1814.0,10314.0,0.0,20.0,0.0,14.0,31653.0,808.0,731.0
max,704.0,3362.0,861.0,685.0,18579.0,21843.0,19379.0,5660.0,24771.0,1022.0,681.0,0.0,7849.0,52314.0,5261.0,3018.0


the biomassregions dataframe looks identical to the biomass dataframe.  This is a check to be sure that selecting only rows that were not ZERO for biomass production completely covered the data for the regions with biomass production.  In other words - if the two dataframes are identical, biomass generation is present in every row and every hour in the data.

Another check - the min value for biomass in the biomassregions dataframe is not zero.

In [552]:
maxCAL_value = df_biomass[df_biomass['region'] == 'CAL_CISO']['biomass (GW)'].max()
maxSWPP_value = df_biomass[df_biomass['region'] == 'CENT_SWPP']['biomass (GW)'].max()
maxNE_value = df_biomass[df_biomass['region'] == 'NE_ISNE']['biomass (GW)'].max()
print(f" Maximum production from biomass hourly in California is {maxCAL_value} GW, in Midwest is {maxSWPP_value} GW, and in New England is {maxNE_value} GW.")

 Maximum production from biomass hourly in California is 527.0 GW, in Midwest is 15.0 GW, and in New England is 685.0 GW.


In [553]:
medianCAL_value = df_biomass[df_biomass['region'] == 'CAL_CISO']['biomass (GW)'].median()
medianSWPP_value = df_biomass[df_biomass['region'] == 'CENT_SWPP']['biomass (GW)'].median()
medianNE_value = df_biomass[df_biomass['region'] == 'NE_ISNE']['biomass (GW)'].median()
print(f" Median production from biomass hourly in California is {medianCAL_value} GW, in Midwest is {medianSWPP_value} GW, and in New England is {medianNE_value} GW.")

 Median production from biomass hourly in California is 460.0 GW, in Midwest is 13.0 GW, and in New England is 558.0 GW.
