# Electricity Generation from api.electricitymap.org
---


SECTION 1 
collect data on carbon intensity, zones within SW , and power breakdown from the electricitymap api

In [274]:
# Dependencies and Setup

import pandas as pd
import requests
from datetime import datetime, timedelta

In [275]:

# zones for electrical utilities in US
zones = ["US-SW-PNM", "US-SW-EPE", "US-SW-WALC", "US-NW-PACE", "US-NW-PSCO", "US-CENT-SWPP", "US-TEX-ERCO", "US-MIDW-AECI","US-SW-AZPS","US-SW-AZPS",
         "US-NW-WACM", "US-SW-SRP", "US-SW-TEPC", "US-CENT-SPA", "US-CAL-IID", "US-CAL-CISO", "US-CAL-BANC","US-CAL-BANC", "US-CAL-TIDC", 
          "US-CAR-CPLE", "US-CAR-CPLW", "US-CAR-DUK", "US-CAR-SC", "US-CAR-SCEG", "US-CAR-YAD", "US-FLA-FMPP", "US-FLA-FPC" , "US-FLA-FPL",
          "US-FLA-GVL" , "US-FLA-HST", "US-FLA-JEA", "US-FLA-SEC", "US-FLA-TAL", "US-FLA-TEC", "US-MIDW-AECI" , "US-MIDW-LGEE", "US-MIDW-MISO",
          "US-NE-ISNE", "US-NW-BPAT", "US-NW-CHPD", "US-NW-DOPD", "US-NW-GCPD", "US-NW-GRID",  "US-NW-IPCO" , "US-NW-NWMT", "US-NW-NEVP", 
           "US-NW-PACW",  "US-NW-PGE", "US-NW-PSEI", "US-NW-SCL", "US-NW-TPWR", "US-NW-WAUW", "US-NY-NYIS", "US-SE-SEPA", "US-SE-SOCO" , 
           "US-TEN-TVA"]


In [276]:
# simplify region names by removing "US-" for use in recording the data
regions = []
for zone in zones:
    region_code = zone[3:]
    new_code = region_code.replace('-', '_',1)
    regions.append(new_code)  

In [277]:
# get carbon intensity history for the US utilities
urls = []
for index, url in enumerate(zones):
    url = f'https://api.electricitymap.org/v3/carbon-intensity/history?zone={zones[index]}'
    urls.append(url)

# store responses in responses_dict dictionary
responses_dict = {}
for idx, url in enumerate(urls):
    response = requests.get(url)
    responses_dict[f"response_{idx+1}"] = response.json()


In [278]:
#request power breakdown for the US utilities
pburls = []
for index, url in enumerate(zones):
    pburl = f'https://api.electricitymap.org/v3/power-breakdown/history?zone={zones[index]}'
    pburls.append(pburl)

#store response in power_breakdown_responses_dict dictionary
power_breakdown_responses_dict = {}
for idx, pburl in enumerate(pburls):
    response = requests.get(pburl)
    power_breakdown_responses_dict[f"response_{idx+1}"] = response.json()


In [279]:
#  Create a dictionary with the desired keys and values for response 1 from each dictionary
carbon_intensity =[]
dateandtime = []
isEstimated = []
nuc = []
geo = []
bio = []
coal = []
wind = []
solar = []
hydro = []
gas =[]
oil = []
unknown = []
powerImport = []
powerExport = []
powerProdTotal = []
hydro_discharge = []
battery_discharge = []
Powest = []
region = []

#populate each list with values from the dictionaries
for j in range(len(power_breakdown_responses_dict)):
    # j is or each region (each region is a response)
    for i in range(24):
        # i is for each hour (24 hours)
        CI = responses_dict[f'response_{j+1}']['history'][i]['carbonIntensity']
        carbon_intensity.append(CI)
        DT = responses_dict[f'response_{j+1}']['history'][i]['datetime']
        dateandtime.append(DT)
        EST = responses_dict[f'response_{j+1}']['history'][i]['isEstimated']
        isEstimated.append(EST)
        NUC = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['nuclear']
        nuc.append(NUC)
        GEO = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['geothermal']
        geo.append(GEO)
        BIO = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['biomass']
        bio.append(BIO)
        COAL = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['coal']
        coal.append(COAL)
        WIND = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['wind']
        wind.append(WIND)
        SOLAR = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['solar']
        solar.append(SOLAR)
        HYDRO = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['hydro']
        hydro.append(HYDRO)
        GAS = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['gas']
        gas.append(GAS)
        OIL = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['oil']
        oil.append(OIL)
        UNKNOWN = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['unknown']
        unknown.append(UNKNOWN)
        HYDDIS = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['hydro discharge']
        hydro_discharge.append(HYDDIS)
        BATDIS = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]["powerProductionBreakdown"]['battery discharge']
        battery_discharge.append(BATDIS)
        PPRODTOT = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['powerProductionTotal']
        powerProdTotal.append(PPRODTOT)
        PIMPTOT = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['powerImportTotal']
        powerImport.append(PIMPTOT)
        PEXPTOT = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['powerExportTotal']
        powerExport.append(PEXPTOT)
        POWEST = power_breakdown_responses_dict[f'response_{j+1}']['history'][i]['isEstimated']
        Powest.append(POWEST)
        region.append(regions[j])
        

In [280]:
# Create Series and then Create the dataframe
ser_carbon_intensity = pd.Series(carbon_intensity)
ser_dateandtime = pd.Series(dateandtime)
ser_EST = pd.Series(isEstimated)
df = pd.DataFrame({'region': region, 'datetime':ser_dateandtime,'carbonIntensity':ser_carbon_intensity,'isEstimated':ser_EST,
                          'nuclear (GW)':nuc,'geothermal (GW)':geo, 'biomass (GW)':bio, 'coal (GW)':coal, 'wind (GW)':wind,
                          'solar (GW)':solar, 'hydro (GW)':hydro, 'gas (GW)': gas, 'oil (GW)': oil, 'unknown (GW)':unknown, 
                          'hydro_discharge (GW)':hydro_discharge, 'battery_discharge (GW)':battery_discharge,
                          'PowerProductionTotal (GW)':powerProdTotal, 'PowerImportTotal (GW)':powerImport, 
                          'PowerExportTotal (GW)':powerExport, 'ConsumptionEstimated':Powest})


SECTION 2     
PowerBreakdown data transformation

In [281]:
# fill NA values with zeroes for energy values
df = df.fillna({'carbonIntensity':0,'nuclear (GW)': 0,'geothermal (GW)': 0,'biomass (GW)': 0, 'coal (GW)': 0, 'wind (GW)': 0, 'solar (GW)': 0, 
           'hydro (GW)': 0, 'gas (GW)': 0, 'oil (GW)': 0, 'unknown (GW)': 0, 'hydro_discharge (GW)':0, 
           'battery_discharge (GW)':0, 'PowerImportTotal (GW)':0, 'PowerExportTotal (GW)':0})

In [282]:
# Date Time work

# import datetime dependencies

from datetime import datetime

# set up lists to hold parsed data and DateTime as a datetime datetype
dates=[]
times = []
DateTime =[]

# convert date time strings
for i in range(len(df['datetime'])):

    # Parse the timestamp string to a datetime object
    dt_obj = datetime.strptime(df.iloc[i,1], '%Y-%m-%dT%H:%M:%S.%fZ')

    date = dt_obj.strftime('%Y-%m-%d')
    time = dt_obj.strftime('%H:%M:%S')

#add the new times and dates to lists

    dates.append(date)
    times.append(time)
    DateTime.append(dt_obj)

# add the times and dates to new columns in the data frame
df['UTC time'] = times
df['UTC date'] = dates
df['UTC DateTime'] = DateTime

In [283]:
#  reindex the data frame to make the UTC DateTime column the index 
df_reindex = df.set_index("UTC DateTime", drop=True, inplace=True)

In [284]:
#drop the datetime column that contains a string
df_reindex = df.drop('datetime', axis=1)

Section 3
Merge with previous data

In [285]:
# import previous cleaned file into a pandas dataframe
df_us_energy = pd.read_csv('data/allRegions/simplifiedrunningfile.csv')
df_us_energy_dropped = df_us_energy.drop_duplicates(subset=['UTC date', 'UTC time', 'region'],keep='first')
df_us_energy_reindex=df_us_energy_dropped.set_index("UTC DateTime")

In [286]:
# concatentate current data with existing file
df_both = pd.concat([df_us_energy_reindex,df_reindex,])
# sort the rows by region and by date
df_sort = df_both.sort_values(by=['region', 'UTC date'])

In [287]:
# drop duplicate rows that have the same region, UTC time, and UTC date.  Keep the last row as it may contain updated data
df_final = df_sort.drop_duplicates(subset=['region', 'UTC time', 'UTC date'],keep='last')

In [288]:

df_final.to_csv('data/allRegions/simplifiedrunningfile.csv')

In [289]:
df_final.describe()

Unnamed: 0,carbonIntensity,nuclear (GW),geothermal (GW),biomass (GW),coal (GW),wind (GW),solar (GW),hydro (GW),gas (GW),oil (GW),unknown (GW),hydro_discharge (GW),battery_discharge (GW),PowerProductionTotal (GW),PowerImportTotal (GW),PowerExportTotal (GW)
count,19391.0,19398.0,19398.0,19398.0,19398.0,19398.0,19398.0,19398.0,19398.0,19398.0,19398.0,19398.0,19398.0,19380.0,19398.0,19398.0
mean,410.322211,1144.835241,14.477111,19.939891,1395.369162,780.07176,528.630426,530.038664,3604.842922,2.565986,91.214455,-74.037272,-4.414115,8139.966202,203.192133,73.086607
std,240.974293,2457.868875,104.428527,100.027936,3590.680991,2666.997199,2311.886047,1215.359795,7377.586377,45.736515,294.251369,2687.431564,438.29109,15722.915528,839.654176,268.403821
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-297409.0,-6411.0,0.0,0.0,0.0
25%,271.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,214.0,0.0,0.0,0.0,0.0,667.0,0.0,0.0
50%,434.0,0.0,0.0,0.0,0.0,0.0,0.0,54.5,905.0,0.0,0.0,0.0,0.0,2102.5,0.0,0.0
75%,579.0,650.0,0.0,0.0,1070.0,186.0,128.0,527.0,2523.0,0.0,62.0,0.0,0.0,7207.0,0.0,0.0
max,1250.0,11767.0,809.0,630.0,30192.0,26093.0,20647.0,26491.0,48696.0,2853.0,13129.0,291.0,7837.0,106778.0,8578.0,2363.0
