# Electricity Generation from api.electricitymap.org
---

SECTION 1 
collect data on carbon intensity, zones within SW , and power breakdown from the electricitymap api

In [134]:
# Dependencies and Setup

import pandas as pd
import requests


In [135]:

# zones for electrical utilities in US
zones = ["US-SW-PNM", "US-SW-EPE", "US-SW-WALC", "US-NW-PACE", "US-NW-PSCO", "US-CENT-SWPP", "US-TEX-ERCO", "US-MIDW-AECI","US-SW-AZPS","US-SW-AZPS",
         "US-NW-WACM", "US-SW-SRP", "US-SW-TEPC", "US-CENT-SPA", "US-CAL-IID", "US-CAL-CISO", "US-CAL-BANC","US-CAL-BANC", "US-CAL-TIDC", 
          "US-CAR-CPLE", "US-CAR-CPLW", "US-CAR-DUK", "US-CAR-SC", "US-CAR-SCEG", "US-CAR-YAD", "US-FLA-FMPP", "US-FLA-FPC" , "US-FLA-FPL",
          "US-FLA-GVL" , "US-FLA-HST", "US-FLA-JEA", "US-FLA-SEC", "US-FLA-TAL", "US-FLA-TEC", "US-MIDW-AECI" , "US-MIDW-LGEE", "US-MIDW-MISO",
          "US-NE-ISNE", "US-NW-BPAT", "US-NW-CHPD", "US-NW-DOPD", "US-NW-GCPD", "US-NW-GRID",  "US-NW-IPCO" , "US-NW-NWMT", "US-NW-NEVP", 
           "US-NW-PACW",  "US-NW-PGE", "US-NW-PSEI", "US-NW-SCL", "US-NW-TPWR", "US-NW-WAUW", "US-NY-NYIS", "US-SE-SEPA", "US-SE-SOCO" , 
           "US-TEN-TVA"]


In [136]:

# get carbon intensity history for the US utilities
urls = []
for index, url in enumerate(zones):
    url = f'https://api.electricitymap.org/v3/carbon-intensity/history?zone={zones[index]}'
    urls.append(url)

# store responses in responses_dict dictionary
responses_dict = {}
for idx, url in enumerate(urls):
    response = requests.get(url)
    responses_dict[f"response_{idx+1}"] = response.json()


In [137]:
#request power breakdown
pburls = []
for index, url in enumerate(zones):
    pburl = f'https://api.electricitymap.org/v3/power-breakdown/history?zone={zones[index]}'
    pburls.append(pburl)

#store response in power_breakdown_responses_dict dictionary
power_breakdown_responses_dict = {}
for idx, pburl in enumerate(pburls):
    response = requests.get(pburl)
    power_breakdown_responses_dict[f"response_{idx+1}"] = response.json()


SECTION 2     
PowerBreakdown data transformation

In [138]:
# pull data from power breakdown response and the carbon_intensity ressponse
region = power_breakdown_responses_dict['response_1']['history'][0]['zone']
datetime = power_breakdown_responses_dict['response_1']['history'][0]['datetime']
nuclear = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['nuclear']
geothermal = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['geothermal']
biomass = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['biomass']
coal = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['coal']
wind = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['wind']
solar = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['solar']
hydro = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['hydro']
gas = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['gas']
oil = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['oil']
unknown = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['unknown']
hydro_discharge = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['hydro discharge']
battery_discharge = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionBreakdown"]['battery discharge']
renewable_percentage = power_breakdown_responses_dict['response_1']['history'][0]["renewablePercentage"]
total_consumption = power_breakdown_responses_dict['response_1']['history'][0]["powerConsumptionTotal"]
estimated = power_breakdown_responses_dict['response_1']['history'][0]["isEstimated"]
carbon_Intensity = responses_dict['response_1']['history'][0]["carbonIntensity"]
estimated_C = responses_dict['response_1']['history'][0]["isEstimated"]

# create a dictionary with first values for this zone
us_pnm1 = {'region':region,'datetime':datetime,'nuclear':nuclear,'geothermal':geothermal,'biomass':biomass, 'coal':coal, 'wind':wind, 'solar':solar, 
           'hydro':hydro, 'gas':gas, 'oil':oil, 'unknown':unknown, 'hydro-discharge':hydro_discharge, 
           'battery_discharge':battery_discharge, 'renewable_percentage':renewable_percentage, 'total_consumption':total_consumption, 
           'estimated':estimated, 'carbon_Intensity':carbon_Intensity, 'estimated_C':estimated_C}

# Create a dataFrame with the first values
df_US = pd.DataFrame.from_dict(us_pnm1,orient='index')


In [139]:
# Data wrangling from the response to create a legible dataFrame
# outer for loop for regions/responses
for reg in range(len(zones)):
    #for each zone
    response = f"response_{reg+1}"
   
# pull data from response for each time in this file for this region and add to the existing dataframe
    for i in range(24):
        # 24 is for the 24 hours of data for each zone
        region = power_breakdown_responses_dict['response_1']['history'][i]['zone']
        datetime = power_breakdown_responses_dict['response_1']['history'][i]['datetime']
        nuclear = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['nuclear']
        geothermal = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['geothermal']
        biomass = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['biomass']
        coal = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['coal']
        wind = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['wind']
        solar = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['solar']
        hydro = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['hydro']
        gas = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['gas']
        oil = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['oil']
        unknown = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['unknown']
        hydro_discharge = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['hydro discharge']
        battery_discharge = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionBreakdown"]['battery discharge']
        renewable_percentage = power_breakdown_responses_dict['response_1']['history'][i]["renewablePercentage"]
        total_consumption = power_breakdown_responses_dict['response_1']['history'][i]["powerConsumptionTotal"]
        estimated = power_breakdown_responses_dict['response_1']['history'][i]["isEstimated"]
        carbon_Intensity = responses_dict['response_1']['history'][0]["carbonIntensity"]
        estimated_C = responses_dict['response_1']['history'][0]["isEstimated"]

        # this 24 is also for the 23 hours of data for each zone
        df_US[24*reg+i]= {'region':region, 'datetime':datetime,'nuclear':nuclear,'geothermal':geothermal,'biomass':biomass, 'coal':coal, 'wind':wind, 'solar':solar, 
           'hydro':hydro, 'gas':gas, 'oil':oil, 'unknown':unknown, 'hydro-discharge':hydro_discharge, 
           'battery_discharge':battery_discharge, 'renewable_percentage':renewable_percentage, 'total_consumption':total_consumption, 
           'estimated':estimated, 'carbon_Intensity':carbon_Intensity, 'estimated_C':estimated_C}
#set up the times as rows and measurements as columns
df_US_new = df_US.transpose()

# check data types
df_US_new.describe()

  df_US[24*reg+i]= {'region':region, 'datetime':datetime,'nuclear':nuclear,'geothermal':geothermal,'biomass':biomass, 'coal':coal, 'wind':wind, 'solar':solar,


Unnamed: 0,region,datetime,nuclear,geothermal,biomass,coal,wind,solar,hydro,gas,oil,unknown,hydro-discharge,battery_discharge,renewable_percentage,total_consumption,estimated,carbon_Intensity,estimated_C
count,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344,1344
unique,1,24,1,1,1,21,21,15,1,24,1,17,1,1,20,24,2,1,1
top,US-SW-PNM,2024-07-21T16:00:00.000Z,0,0,0,157,0,0,18,324,0,0,0,0,20,1893,False,212,False
freq,1344,56,1344,1344,1344,112,168,560,1344,56,1344,392,1344,1344,112,56,840,1344,1344


In [140]:
# fill NA values with zeroes for energy values
df_US_new = df_US_new.fillna({'nuclear': 0,'geothermal': 0,'biomass': 0, 'coal': 0, 'wind': 0, 'solar': 0, 
           'hydro': 0, 'gas': 0, 'oil': 0, 'unknown': 0, 'hydro-discharge':0, 
           'battery_discharge':0})
df_US_new.describe()

Unnamed: 0,nuclear,geothermal,biomass,coal,wind,solar,hydro,gas,oil,unknown,hydro-discharge,battery_discharge
count,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0,1344.0
mean,0.0,0.0,0.0,344.875,382.875,425.833333,18.0,523.083333,0.0,31.916667,0.0,0.0
std,0.0,0.0,0.0,100.051524,389.319557,442.039714,0.0,135.977021,0.0,43.620432,0.0,0.0
min,0.0,0.0,0.0,157.0,0.0,0.0,18.0,310.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,319.0,146.5,0.0,18.0,394.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,351.5,243.5,206.0,18.0,539.0,0.0,13.5,0.0,0.0
75%,0.0,0.0,0.0,385.0,404.5,934.25,18.0,643.25,0.0,31.25,0.0,0.0
max,0.0,0.0,0.0,502.0,1325.0,1055.0,18.0,718.0,0.0,141.0,0.0,0.0


In [141]:
# convert measured Energy values to integers in Giga Watts
#convert_dict = {'hydro': int}
convert_dict = {'nuclear': int, 'geothermal': int, 'biomass': int, 'coal': int, 'wind': int, 'solar': int, 'hydro': int, 'gas': int, 'oil': int, 
                'hydro-discharge': int, 'battery_discharge': int, 'renewable_percentage': int, 'total_consumption': int
                }
 # note - the unknown column only has values rarely - converting null values to integer doesn't work so this is left as an object
df_US_new = df_US_new.astype(convert_dict)

#check that data types are changed to int
df_US_new.dtypes

region                  object
datetime                object
nuclear                  int32
geothermal               int32
biomass                  int32
coal                     int32
wind                     int32
solar                    int32
hydro                    int32
gas                      int32
oil                      int32
unknown                  int64
hydro-discharge          int32
battery_discharge        int32
renewable_percentage     int32
total_consumption        int32
estimated               object
carbon_Intensity        object
estimated_C             object
dtype: object

In [142]:
# Date Time work

# import datetime dependencies

from datetime import datetime

# set up lists to hold parsed data and DateTime as a datetime datetype
dates=[]
times = []
DateTime =[]

# convert date time strings
for i in range(len(df_US_new['datetime'])):

    # Parse the timestamp string to a datetime object
    dt_obj = datetime.strptime(df_US_new.iloc[i,1], '%Y-%m-%dT%H:%M:%S.%fZ')

    date = dt_obj.strftime('%Y-%m-%d')
    time = dt_obj.strftime('%H:%M:%S')

#add the new times and dates to lists

    dates.append(date)
    times.append(time)
    DateTime.append(dt_obj)

# add the times and dates to new columns in the data frame
df_US_new['UTC time'] = times
df_US_new['UTC date'] = dates
df_US_new['UTC DateTime'] = DateTime

In [143]:
#set the UTC DateTime as the index
df_US_new_reindex = df_US_new.set_index('UTC DateTime', inplace=True)

#drop the datetime column that contains a string
df_US_newer = df_US_new.drop('datetime', axis=1)
df_US_newer.head()

Unnamed: 0_level_0,region,nuclear,geothermal,biomass,coal,wind,solar,hydro,gas,oil,unknown,hydro-discharge,battery_discharge,renewable_percentage,total_consumption,estimated,carbon_Intensity,estimated_C,UTC time,UTC date
UTC DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2024-07-21 16:00:00,US-SW-PNM,0,0,0,157,382,1012,18,324,0,0,0,0,75,1893,False,212,False,16:00:00,2024-07-21
2024-07-21 17:00:00,US-SW-PNM,0,0,0,157,236,962,18,321,0,0,0,0,72,1694,False,212,False,17:00:00,2024-07-21
2024-07-21 18:00:00,US-SW-PNM,0,0,0,166,158,925,18,350,0,0,0,0,68,1617,False,212,False,18:00:00,2024-07-21
2024-07-21 19:00:00,US-SW-PNM,0,0,0,234,151,975,18,396,0,0,0,0,64,1774,False,212,False,19:00:00,2024-07-21
2024-07-21 20:00:00,US-SW-PNM,0,0,0,265,305,989,18,420,0,0,0,0,66,1997,False,212,False,20:00:00,2024-07-21


In [144]:
# convert carbon intensity measurement to an integer in g CO2e/kWh
convert_dict_C= {'carbon_Intensity': int}
 
df_US_newer = df_US_newer.astype(convert_dict_C)

#check that the datatype has been changed
df_US_newer.dtypes

region                  object
nuclear                  int32
geothermal               int32
biomass                  int32
coal                     int32
wind                     int32
solar                    int32
hydro                    int32
gas                      int32
oil                      int32
unknown                  int64
hydro-discharge          int32
battery_discharge        int32
renewable_percentage     int32
total_consumption        int32
estimated               object
carbon_Intensity         int32
estimated_C             object
UTC time                object
UTC date                object
dtype: object

Section 3
Merge new dataframe with running data from previous dates in runningUSenergy_data.csv

In [145]:
# import previous cleaned file into a pandas dataframe
df_us_energy = pd.read_csv('data/runningUSenergy_data.csv')
df_us_energy_reindex=df_us_energy.set_index("UTC DateTime")
df_us_energy_reindex.describe()

Unnamed: 0,nuclear(GW),geothermal(GW),biomass(GW),coal(GW),wind(GW),solar(GW),hydro(GW),gas(GW),oil,unknown,...,nuclear,geothermal,biomass,coal,wind,solar,hydro,gas,total_consumption,carbon_Intensity
count,13379.0,13379.0,13379.0,13379.0,13379.0,13379.0,13379.0,13379.0,13403.0,13403.0,...,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,1229.658196,15.005157,23.656178,1729.539652,595.054638,519.601614,653.336124,4187.631512,6.513467,97.747519,...,0.0,0.0,0.0,344.875,382.875,425.833333,18.0,523.083333,1726.583333,212.0
std,2560.032028,105.050185,109.85105,4548.255206,2085.45366,2050.877802,1497.468033,8179.049804,52.687573,205.302931,...,0.0,0.0,0.0,102.165387,397.545,451.379014,0.0,138.849909,555.908963,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,157.0,0.0,0.0,18.0,310.0,1006.0,212.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,238.0,0.0,0.0,...,0.0,0.0,0.0,319.0,146.5,0.0,18.0,394.0,1214.5,212.0
50%,0.0,0.0,0.0,36.0,0.0,0.0,76.0,992.0,0.0,9.0,...,0.0,0.0,0.0,351.5,243.5,206.0,18.0,539.0,1639.0,212.0
75%,738.5,0.0,0.0,1368.0,182.0,178.0,712.0,3333.0,0.0,95.0,...,0.0,0.0,0.0,385.0,404.5,934.25,18.0,643.25,2246.0,212.0
max,13803.0,812.0,657.0,49357.0,21642.0,19768.0,65867.0,54713.0,2054.0,2053.0,...,0.0,0.0,0.0,502.0,1325.0,1055.0,18.0,718.0,2701.0,212.0


In [146]:
df_us_energy_reindex.drop_duplicates(inplace=True)
df_us_energy_reindex.describe()

Unnamed: 0,nuclear(GW),geothermal(GW),biomass(GW),coal(GW),wind(GW),solar(GW),hydro(GW),gas(GW),oil,unknown,...,nuclear,geothermal,biomass,coal,wind,solar,hydro,gas,total_consumption,carbon_Intensity
count,13379.0,13379.0,13379.0,13379.0,13379.0,13379.0,13379.0,13379.0,13403.0,13403.0,...,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,1229.658196,15.005157,23.656178,1729.539652,595.054638,519.601614,653.336124,4187.631512,6.513467,97.747519,...,0.0,0.0,0.0,344.875,382.875,425.833333,18.0,523.083333,1726.583333,212.0
std,2560.032028,105.050185,109.85105,4548.255206,2085.45366,2050.877802,1497.468033,8179.049804,52.687573,205.302931,...,0.0,0.0,0.0,102.165387,397.545,451.379014,0.0,138.849909,555.908963,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,157.0,0.0,0.0,18.0,310.0,1006.0,212.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,238.0,0.0,0.0,...,0.0,0.0,0.0,319.0,146.5,0.0,18.0,394.0,1214.5,212.0
50%,0.0,0.0,0.0,36.0,0.0,0.0,76.0,992.0,0.0,9.0,...,0.0,0.0,0.0,351.5,243.5,206.0,18.0,539.0,1639.0,212.0
75%,738.5,0.0,0.0,1368.0,182.0,178.0,712.0,3333.0,0.0,95.0,...,0.0,0.0,0.0,385.0,404.5,934.25,18.0,643.25,2246.0,212.0
max,13803.0,812.0,657.0,49357.0,21642.0,19768.0,65867.0,54713.0,2054.0,2053.0,...,0.0,0.0,0.0,502.0,1325.0,1055.0,18.0,718.0,2701.0,212.0


In [147]:
# concatentate current data with existing file
df_both = pd.concat([df_us_energy_reindex,df_US_newer])

# drop duplicate rows
df_both.drop_duplicates(inplace=True)
df_both.describe()

Unnamed: 0,nuclear(GW),geothermal(GW),biomass(GW),coal(GW),wind(GW),solar(GW),hydro(GW),gas(GW),oil,unknown,...,nuclear,geothermal,biomass,coal,wind,solar,hydro,gas,total_consumption,carbon_Intensity
count,13379.0,13379.0,13379.0,13379.0,13379.0,13379.0,13379.0,13379.0,13403.0,13403.0,...,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,1229.658196,15.005157,23.656178,1729.539652,595.054638,519.601614,653.336124,4187.631512,6.513467,97.747519,...,0.0,0.0,0.0,344.875,382.875,425.833333,18.0,523.083333,1726.583333,212.0
std,2560.032028,105.050185,109.85105,4548.255206,2085.45366,2050.877802,1497.468033,8179.049804,52.687573,205.302931,...,0.0,0.0,0.0,102.165387,397.545,451.379014,0.0,138.849909,555.908963,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,157.0,0.0,0.0,18.0,310.0,1006.0,212.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,238.0,0.0,0.0,...,0.0,0.0,0.0,319.0,146.5,0.0,18.0,394.0,1214.5,212.0
50%,0.0,0.0,0.0,36.0,0.0,0.0,76.0,992.0,0.0,9.0,...,0.0,0.0,0.0,351.5,243.5,206.0,18.0,539.0,1639.0,212.0
75%,738.5,0.0,0.0,1368.0,182.0,178.0,712.0,3333.0,0.0,95.0,...,0.0,0.0,0.0,385.0,404.5,934.25,18.0,643.25,2246.0,212.0
max,13803.0,812.0,657.0,49357.0,21642.0,19768.0,65867.0,54713.0,2054.0,2053.0,...,0.0,0.0,0.0,502.0,1325.0,1055.0,18.0,718.0,2701.0,212.0


In [148]:
#df_both.to_csv(f'data/{dates[0]}USenergy_data2.csv')
df_both.to_csv(f'data/runningUSenergy_data.csv')

In [149]:
df_both_cleaned = df_both.loc[df_both['breakdown estimated?']==False,:]
df_both_cleaned.describe()

Unnamed: 0,nuclear(GW),geothermal(GW),biomass(GW),coal(GW),wind(GW),solar(GW),hydro(GW),gas(GW),oil,unknown,...,nuclear,geothermal,biomass,coal,wind,solar,hydro,gas,total_consumption,carbon_Intensity
count,4464.0,4464.0,4464.0,4464.0,4464.0,4464.0,4464.0,4464.0,4464.0,4464.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,1376.658378,43.142249,65.329301,1911.872312,1004.55914,721.797043,1056.001568,5348.823253,14.100358,117.155466,...,,,,,,,,,,
std,2320.734535,174.720418,174.751585,4629.053361,2699.243477,2680.985539,1928.116395,8345.149301,82.400487,230.986926,...,,,,,,,,,,
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
25%,0.0,0.0,0.0,0.0,0.0,0.0,11.0,281.0,0.0,0.0,...,,,,,,,,,,
50%,0.0,0.0,0.0,32.0,32.5,1.0,219.0,1433.5,0.0,13.0,...,,,,,,,,,,
75%,2232.0,0.0,0.25,1284.0,433.0,226.0,1310.0,8439.5,0.0,127.25,...,,,,,,,,,,
max,13287.0,812.0,657.0,48997.0,21642.0,19768.0,65867.0,54713.0,2054.0,2053.0,...,,,,,,,,,,


In [150]:
df_both_cleaned.to_csv(f'data/runningUSenergy_data_filtered.csv')