# Electricity Generation from api.electricitymap.org
---


SECTION 1 
collect data on carbon intensity, zones within SW , and power breakdown from the electricitymap api

In [1]:
# Dependencies and Setup

import pandas as pd
import requests
from datetime import datetime, timedelta

In [2]:

# zones for electrical utilities in US
zones = ["US-SW-PNM", "US-SW-EPE", "US-SW-WALC", "US-NW-PACE", "US-NW-PSCO", "US-CENT-SWPP", "US-TEX-ERCO", "US-MIDW-AECI","US-SW-AZPS","US-SW-AZPS",
         "US-NW-WACM", "US-SW-SRP", "US-SW-TEPC", "US-CENT-SPA", "US-CAL-IID", "US-CAL-CISO", "US-CAL-BANC","US-CAL-BANC", "US-CAL-TIDC", 
          "US-CAR-CPLE", "US-CAR-CPLW", "US-CAR-DUK", "US-CAR-SC", "US-CAR-SCEG", "US-CAR-YAD", "US-FLA-FMPP", "US-FLA-FPC" , "US-FLA-FPL",
          "US-FLA-GVL" , "US-FLA-HST", "US-FLA-JEA", "US-FLA-SEC", "US-FLA-TAL", "US-FLA-TEC", "US-MIDW-AECI" , "US-MIDW-LGEE", "US-MIDW-MISO",
          "US-NE-ISNE", "US-NW-BPAT", "US-NW-CHPD", "US-NW-DOPD", "US-NW-GCPD", "US-NW-GRID",  "US-NW-IPCO" , "US-NW-NWMT", "US-NW-NEVP", 
           "US-NW-PACW",  "US-NW-PGE", "US-NW-PSEI", "US-NW-SCL", "US-NW-TPWR", "US-NW-WAUW", "US-NY-NYIS", "US-SE-SEPA", "US-SE-SOCO" , 
           "US-TEN-TVA"]


In [3]:
# simplify region names by removing "US-" for use in recording the data
regions = []
for zone in zones:
    region_code = zone[3:]
    new_code = region_code.replace('-', '_',1)
    regions.append(new_code)  

['SW_PNM', 'SW_EPE', 'SW_WALC', 'NW_PACE', 'NW_PSCO', 'CENT_SWPP', 'TEX_ERCO', 'MIDW_AECI', 'SW_AZPS', 'SW_AZPS', 'NW_WACM', 'SW_SRP', 'SW_TEPC', 'CENT_SPA', 'CAL_IID', 'CAL_CISO', 'CAL_BANC', 'CAL_BANC', 'CAL_TIDC', 'CAR_CPLE', 'CAR_CPLW', 'CAR_DUK', 'CAR_SC', 'CAR_SCEG', 'CAR_YAD', 'FLA_FMPP', 'FLA_FPC', 'FLA_FPL', 'FLA_GVL', 'FLA_HST', 'FLA_JEA', 'FLA_SEC', 'FLA_TAL', 'FLA_TEC', 'MIDW_AECI', 'MIDW_LGEE', 'MIDW_MISO', 'NE_ISNE', 'NW_BPAT', 'NW_CHPD', 'NW_DOPD', 'NW_GCPD', 'NW_GRID', 'NW_IPCO', 'NW_NWMT', 'NW_NEVP', 'NW_PACW', 'NW_PGE', 'NW_PSEI', 'NW_SCL', 'NW_TPWR', 'NW_WAUW', 'NY_NYIS', 'SE_SEPA', 'SE_SOCO', 'TEN_TVA']


In [4]:
# get carbon intensity history for the US utilities
urls = []
for index, url in enumerate(zones):
    url = f'https://api.electricitymap.org/v3/carbon-intensity/history?zone={zones[index]}'
    urls.append(url)

# store responses in responses_dict dictionary
responses_dict = {}
for idx, url in enumerate(urls):
    response = requests.get(url)
    responses_dict[f"response_{idx+1}"] = response.json()


In [5]:
#request power breakdown for the US utilities
pburls = []
for index, url in enumerate(zones):
    pburl = f'https://api.electricitymap.org/v3/power-breakdown/history?zone={zones[index]}'
    pburls.append(pburl)

#store response in power_breakdown_responses_dict dictionary
power_breakdown_responses_dict = {}
for idx, pburl in enumerate(pburls):
    response = requests.get(pburl)
    power_breakdown_responses_dict[f"response_{idx+1}"] = response.json()


In [6]:
#  Create a dictionary with the desired keys and values for response 1 from each dictionary
carbon_intensity =[]
dateandtime = []
isEstimated = []
nuc = []
geo = []
bio = []
coal = []
wind = []
solar = []
hydro = []
gas =[]
oil = []
unknown = []
powerImport = []
powerExport = []
powerProdTotal = []
hydro_discharge = []
battery_discharge = []
Powest = []
region = []

#populate each list with values from the dictionaries
for j in range(len(power_breakdown_responses_dict)):
    # j is or each region (each region is a response)
    for i in range(24):
        # i is for each hour (24 hours)
        CI = responses_dict[f'response_{j+1}']['history'][i]['carbonIntensity']
        carbon_intensity.append(CI)
        DT = responses_dict[f'response_{j+1}']['history'][i]['datetime']
        dateandtime.append(DT)
        EST = responses_dict[f'response_{j+1}']['history'][i]['isEstimated']
        isEstimated.append(EST)
        NUC = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['nuclear']
        nuc.append(NUC)
        GEO = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['geothermal']
        geo.append(GEO)
        BIO = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['biomass']
        bio.append(BIO)
        COAL = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['coal']
        coal.append(COAL)
        WIND = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['wind']
        wind.append(WIND)
        SOLAR = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['solar']
        solar.append(SOLAR)
        HYDRO = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['hydro']
        hydro.append(HYDRO)
        GAS = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['gas']
        gas.append(GAS)
        OIL = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['oil']
        oil.append(OIL)
        UNKNOWN = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['unknown']
        unknown.append(UNKNOWN)
        HYDDIS = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['hydro discharge']
        hydro_discharge.append(HYDDIS)
        BATDIS = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['battery discharge']
        battery_discharge.append(BATDIS)
        PPRODTOT = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['powerProductionTotal']
        powerProdTotal.append(PPRODTOT)
        PIMPTOT = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['powerImportTotal']
        powerImport.append(PIMPTOT)
        PEXPTOT = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['powerExportTotal']
        powerExport.append(PEXPTOT)
        POWEST = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['isEstimated']
        Powest.append(POWEST)
        region.append(regions[j])
        

In [7]:
# Create Series and then Create the dataframe
ser_carbon_intensity = pd.Series(carbon_intensity)
ser_dateandtime = pd.Series(dateandtime)
ser_EST = pd.Series(isEstimated)
df = pd.DataFrame({'region': region, 'datetime':ser_dateandtime,'carbonIntensity':ser_carbon_intensity,'isEstimated':ser_EST,
                          'nuclear (GW)':nuc,'geothermal (GW)':geo, 'biomass (GW)':bio, 'coal (GW)':coal, 'wind (GW)':wind,
                          'solar (GW)':solar, 'hydro (GW)':hydro, 'gas (GW)': gas, 'oil (GW)': oil, 'unknown (GW)':unknown, 
                          'hydro_discharge (GW)':hydro_discharge, 'battery_discharge (GW)':battery_discharge,
                          'PowerProductionTotal (GW)':powerProdTotal, 'PowerImportTotal (GW)':powerImport, 
                          'PowerExportTotal (GW)':powerExport, 'ConsumptionEstimated':Powest})


Unnamed: 0,region,datetime,carbonIntensity,isEstimated,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),gas (GW),oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW),ConsumptionEstimated
0,SW_PNM,2024-08-13T15:00:00.000Z,339.0,False,,,,157.0,11.0,680.0,15.0,401.0,,,,,1264,,,False
1,SW_PNM,2024-08-13T16:00:00.000Z,338.0,False,,,,159.0,7.0,684.0,15.0,383.0,,,,,1248,,,False
2,SW_PNM,2024-08-13T17:00:00.000Z,304.0,False,,,,158.0,32.0,807.0,14.0,384.0,,,,,1395,,,False
3,SW_PNM,2024-08-13T18:00:00.000Z,272.0,True,,,,158.0,234.0,804.0,14.0,411.0,,,,,1620,,,True
4,SW_PNM,2024-08-13T19:00:00.000Z,248.0,True,,,,158.0,435.0,801.0,13.0,437.0,,,,,1845,,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1339,TEN_TVA,2024-08-14T10:00:00.000Z,354.0,True,7689.0,,,2593.0,3.0,0.0,2226.0,7064.0,0.0,300.0,,,19876,0.0,502.0,True
1340,TEN_TVA,2024-08-14T11:00:00.000Z,354.0,True,7679.0,,,2683.0,3.0,17.0,2676.0,7423.0,0.0,311.0,,,20792,0.0,595.0,True
1341,TEN_TVA,2024-08-14T12:00:00.000Z,352.0,True,7660.0,,,2726.0,3.0,117.0,2906.0,7574.0,0.0,313.0,,,21299,0.0,713.0,True
1342,TEN_TVA,2024-08-14T13:00:00.000Z,350.0,True,7653.0,,,2779.0,3.0,281.0,3007.0,7536.0,0.0,327.0,,,21584,0.0,799.0,True


SECTION 2     
PowerBreakdown data transformation

In [9]:
# fill NA values with zeroes for energy values
df = df.fillna({'carbonIntensity':0,'nuclear (GW)': 0,'geothermal (GW)': 0,'biomass (GW)': 0, 'coal (GW)': 0, 'wind (GW)': 0, 'solar (GW)': 0, 
           'hydro (GW)': 0, 'gas (GW)': 0, 'oil (GW)': 0, 'unknown (GW)': 0, 'hydro_discharge (GW)':0, 
           'battery_discharge (GW)':0, 'PowerImportTotal (GW)':0, 'PowerExportTotal (GW)':0})

Unnamed: 0,carbonIntensity,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),gas (GW),oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW)
count,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0
mean,416.935268,1099.407738,13.670387,18.951637,1418.31994,851.438244,521.093006,539.846726,3550.479911,1.182292,105.820685,-15.770089,-2.991071,8141.198661,182.130208,90.244048
std,233.666648,2426.368021,101.437688,97.959197,3723.081579,3056.93851,2282.219685,1199.762781,7316.056847,7.659952,366.791506,159.55567,448.511764,15795.723593,753.501018,333.53504
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2093.0,-5441.0,0.0,0.0,0.0
25%,277.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,288.0,0.0,0.0,0.0,0.0,888.25,0.0,0.0
50%,438.0,0.0,0.0,0.0,0.0,0.0,0.0,65.0,939.0,0.0,1.0,0.0,0.0,2192.5,0.0,0.0
75%,600.25,132.0,0.0,0.0,1206.5,192.25,141.25,542.25,2289.25,0.0,60.0,0.0,0.0,6780.25,0.0,0.0
max,947.0,11665.0,780.0,594.0,29367.0,21052.0,19431.0,9046.0,46693.0,104.0,8215.0,291.0,7020.0,99510.0,7380.0,2363.0


In [11]:
# Date Time work

# import datetime dependencies

from datetime import datetime

# set up lists to hold parsed data and DateTime as a datetime datetype
dates=[]
times = []
DateTime =[]

# convert date time strings
for i in range(len(df['datetime'])):

    # Parse the timestamp string to a datetime object
    dt_obj = datetime.strptime(df.iloc[i,1], '%Y-%m-%dT%H:%M:%S.%fZ')

    date = dt_obj.strftime('%Y-%m-%d')
    time = dt_obj.strftime('%H:%M:%S')

#add the new times and dates to lists

    dates.append(date)
    times.append(time)
    DateTime.append(dt_obj)

# add the times and dates to new columns in the data frame
df['UTC time'] = times
df['UTC date'] = dates
df['UTC DateTime'] = DateTime

In [13]:
#  reindex the data frame to make the UTC DateTime column the index 
df_reindex = df.set_index("UTC DateTime", drop=True, inplace=True)

In [14]:
#drop the datetime column that contains a string
df_reindex = df.drop('datetime', axis=1)

Unnamed: 0,carbonIntensity,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),gas (GW),oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW)
count,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0
mean,416.935268,1099.407738,13.670387,18.951637,1418.31994,851.438244,521.093006,539.846726,3550.479911,1.182292,105.820685,-15.770089,-2.991071,8141.198661,182.130208,90.244048
std,233.666648,2426.368021,101.437688,97.959197,3723.081579,3056.93851,2282.219685,1199.762781,7316.056847,7.659952,366.791506,159.55567,448.511764,15795.723593,753.501018,333.53504
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2093.0,-5441.0,0.0,0.0,0.0
25%,277.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,288.0,0.0,0.0,0.0,0.0,888.25,0.0,0.0
50%,438.0,0.0,0.0,0.0,0.0,0.0,0.0,65.0,939.0,0.0,1.0,0.0,0.0,2192.5,0.0,0.0
75%,600.25,132.0,0.0,0.0,1206.5,192.25,141.25,542.25,2289.25,0.0,60.0,0.0,0.0,6780.25,0.0,0.0
max,947.0,11665.0,780.0,594.0,29367.0,21052.0,19431.0,9046.0,46693.0,104.0,8215.0,291.0,7020.0,99510.0,7380.0,2363.0


Section 3
Merge with previous data

In [16]:
# import previous cleaned file into a pandas dataframe
df_us_energy = pd.read_csv('data/allRegions/simplifiedrunningfile.csv')
df_us_energy_dropped = df_us_energy.drop_duplicates(subset=['UTC date', 'UTC time', 'region'],keep='first')
df_us_energy_reindex=df_us_energy_dropped.set_index("UTC DateTime")

Unnamed: 0_level_0,region,carbonIntensity,isEstimated,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),...,oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW),ConsumptionEstimated,UTC time,UTC date
UTC DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-08-10 23:00:00,CAL_BANC,251.0,False,0.0,0.0,0.0,0.0,0.0,252.0,1152.0,...,0.0,12.0,0.0,0.0,2587,0.0,0.0,False,23:00:00,2024-08-10
2024-08-11 00:00:00,CAL_BANC,237.0,False,0.0,0.0,0.0,0.0,0.0,218.0,1287.0,...,0.0,0.0,0.0,0.0,2651,0.0,0.0,False,00:00:00,2024-08-11
2024-08-11 01:00:00,CAL_BANC,242.0,False,0.0,0.0,0.0,0.0,0.0,106.0,1406.0,...,0.0,0.0,0.0,0.0,2710,0.0,0.0,False,01:00:00,2024-08-11
2024-08-11 02:00:00,CAL_BANC,249.0,False,0.0,0.0,0.0,0.0,0.0,16.0,1424.0,...,0.0,0.0,0.0,0.0,2649,0.0,0.0,False,02:00:00,2024-08-11
2024-08-11 03:00:00,CAL_BANC,258.0,False,0.0,0.0,0.0,0.0,0.0,0.0,1343.0,...,0.0,13.0,0.0,0.0,2550,0.0,0.0,False,03:00:00,2024-08-11


In [18]:
# concatentate current data with existing file
df_both = pd.concat([df_us_energy_reindex,df_reindex,])
# sort the rows by region and by date
df_sort = df_both.sort_values(by=['region', 'UTC date'])

Unnamed: 0,carbonIntensity,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),gas (GW),oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW)
count,8068.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0,8075.0
mean,419.529499,1126.838638,14.609536,19.600743,1418.960495,721.833932,522.45548,539.190712,3547.337585,1.32904,94.409412,-16.671827,-6.142663,8025.655975,190.195294,67.670712
std,247.892745,2448.170297,105.858624,98.661449,3587.552906,2549.279333,2297.193352,1254.076699,7227.962495,8.877477,350.278812,170.364845,431.620533,15468.739201,790.502327,255.759626
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2350.0,-5461.0,0.0,0.0,0.0
25%,278.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,209.0,0.0,0.0,0.0,0.0,649.5,0.0,0.0
50%,442.0,0.0,0.0,0.0,0.0,0.0,0.0,59.0,924.0,0.0,0.0,0.0,0.0,2086.0,0.0,0.0
75%,590.0,326.0,0.0,0.0,1156.5,170.0,120.0,511.5,2522.5,0.0,60.0,0.0,0.0,7050.5,0.0,0.0
max,1250.0,11767.0,809.0,594.0,29367.0,26093.0,20062.0,26491.0,46693.0,225.0,13129.0,291.0,7454.0,100882.0,8578.0,2363.0


In [19]:
# drop duplicate rows that have the same region, UTC time, and UTC date.  Keep the last row as it may contain updated data
df_final = df_sort.drop_duplicates(subset=['region', 'UTC time', 'UTC date'],keep='last')

Unnamed: 0,carbonIntensity,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),gas (GW),oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW)
count,7625.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0,7632.0
mean,419.742426,1135.546777,14.759696,19.776074,1412.139806,728.979167,490.100891,539.040749,3549.627752,1.338836,94.305687,-17.481394,-2.943658,8005.758648,192.755241,66.455189
std,248.926214,2456.246438,106.457701,99.078128,3565.348784,2572.914518,2220.242976,1257.76045,7212.215313,8.894788,356.249046,175.055672,425.780869,15394.554422,803.254822,249.903942
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-2350.0,-5461.0,0.0,0.0,0.0
25%,278.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,203.0,0.0,0.0,0.0,0.0,632.0,0.0,0.0
50%,442.0,0.0,0.0,0.0,0.0,0.0,0.0,58.0,915.0,0.0,0.0,0.0,0.0,2063.0,0.0,0.0
75%,590.0,326.0,0.0,0.0,1123.0,169.25,98.0,510.0,2601.0,0.0,60.0,0.0,0.0,7118.5,0.0,0.0
max,1250.0,11767.0,809.0,594.0,29367.0,26093.0,20062.0,26491.0,46592.0,225.0,13129.0,291.0,7454.0,100882.0,8578.0,2363.0


In [20]:

df_final.to_csv('data/allRegions/simplifiedrunningfile.csv')