This project is pulling the Carbon emissions in the United States from 2000 to 2020 using the Energy Information Administration API. The data is broken down by sector that is emitting carbon dioxide. My goal is to shed some light on which sectors in the US have the highest carbon footprint. 

We will also see if we can see some trends in the data over time, to see if we are improving over time. 

Once we extract this data into a file we will load this into Tableau for charting purposes. This will be posted under Tableau public. 
https://public.tableau.com/app/profile/datacheme/viz/emissionsworkbook/FuelTypeStateDashboardwithFilters

In [1]:
import requests
from dotenv import load_dotenv
import os
import json
import pandas as pd 
import numpy as np 



def configure():
    load_dotenv()

configure()


#sectorIds CC = commerical , IC = industrial, TC = transportation , EC = electric power, RC = residential , TT = total carbon emissions 
sector_Id_list = ['CC', 'IC', 'TC', 'EC', 'RC', 'TT']

   
s = requests.session()

# We are pulling data from 2000-2020 for carbon emissions by sector from the Energy Information Administration 
#This API only allows 5000 rows per api call, so I broke each call into the sector, and then concatenating the results to one dataframe 

url = "https://api.eia.gov/v2/co2-emissions/co2-emissions-aggregates/data/?frequency=annual&data[0]=value&start=2000&end=2020&sort[0][column]=period&sort[0][direction]=desc&offset=0&length=5000"
api_key = os.getenv('eia-apikey')


df_total = pd.DataFrame()

   
list_of_dfs = []
for i, n in enumerate(sector_Id_list):
    response = s.get(f"{url}&facets[sectorId][]={n}&api_key={api_key}").content
    data = json.loads(response)
    df_init = pd.json_normalize(data['response']['data'])
    list_of_dfs.append(df_init)
    
df_total = pd.concat(list_of_dfs, axis = 0)



s.close()

df_total.to_csv('carbon_emissions.csv')

print(df_total.shape)

(26208, 9)


In [2]:
#check to see if all the categories are represented 
df_total['sectorId'].unique()


array(['CC', 'IC', 'TC', 'EC', 'RC', 'TT'], dtype=object)

In [3]:
#unique states to ensure all data was extracted 
df_total['state-name'].unique()


array(['Maryland', 'Wyoming', 'Wisconsin', 'West Virginia', 'Washington',
       'Virginia', 'Vermont', 'Utah', 'United States', 'Texas',
       'Tennessee', 'South Dakota', 'South Carolina', 'Rhode Island',
       'Pennsylvania', 'Oregon', 'Oklahoma', 'Ohio', 'North Dakota',
       'North Carolina', 'New York', 'New Mexico', 'New Jersey',
       'New Hampshire', 'Nevada', 'Nebraska', 'Montana', 'Missouri',
       'Mississippi', 'Minnesota', 'Michigan', 'Massachusetts', 'Maine',
       'Louisiana', 'Kentucky', 'Kansas', 'Iowa', 'Indiana', 'Illinois',
       'Idaho', 'Hawaii', 'Georgia', 'Florida', 'District of Columbia',
       'Delaware', 'Connecticut', 'Colorado', 'California', 'Arkansas',
       'Arizona', 'Alaska', 'Alabama'], dtype=object)