# EPA - Flight Data Base Tests

In [18]:
import requests
import pandas as pd

In [19]:
%load_ext jupyter_black

The jupyter_black extension is already loaded. To reload it, use:
  %reload_ext jupyter_black


#### Webscraping tests

In [20]:
SERVICE_API = "https://ghgdata.epa.gov/ghgp/service/populateSectorDashboard/"
FACILITIES_API = "https://ghgdata.epa.gov/ghgp/service/listFacility/"

In [21]:
headers = {
    "Host": "ghgdata.epa.gov",
    "Origin": "https://ghgdata.epa.gov",
    "Referer": "https://ghgdata.epa.gov/ghgp/main.do",
    "Sec-Ch-Ua": '"Not.A/Brand";v="8", "Chromium";v="114", "Google Chrome";v="114"',
    "Sec-Ch-Ua-Mobile": "?0",
    "Sec-Ch-Ua-Platform": '"Windows"',
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-origin",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36",
    "X-Requested-With": "XMLHttpRequest",
}

In [25]:
new_payload = {
    "trend": "current",
    "dataSource": "E",
    "reportingYear": "2021",
    "currentYear": "2021",
    "query": "Long Beach",
    "lowE": "-20000",
    "highE": "23000000",
    "state": "CA",
    "countyFips": "",
    "msaCode": "",
    "stateLevel": "0",
    "basin": "",
    "gases": [
        "true",
        "true",
        "true",
        "true",
        "true",
        "false",
        "true",
        "true",
        "true",
        "true",
        "true",
        "true",
    ],
    "sectors": [
        ["true"],
        ["true", "true", "true", "true", "true"],
        ["true", "true", "true", "true", "true", "true", "true", "true"],
        ["true", "true", "true", "true", "true", "true"],
        ["true"],
        ["true", "true", "true"],
        [
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
        ],
        [
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
        ],
        [
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
            "true",
        ],
    ],
    "sortOrder": "0",
    "supplierSector": 0,
    "reportingStatus": "ALL",
    "searchOptions": "11001100",
    "injectionSelection": 11,
    "emissionsType": "",
    "tribalLandId": "",
    "pageNumber": 0,
    "overlayLevel": 0,
    "visType": "list",
}

### Functions

In [26]:
def get_facilities(payload_facilities, year_to_query=2021):
    """
    Makes a request to the FACILITIES_API and returns a dataframe of the results.

    Parameters
    ----------
    payload_facilities : dict
    """

    payload_facilities["reportingYear"] = str(year_to_query)

    r_facilities = requests.post(FACILITIES_API, json=new_payload, headers=headers)
    print(r_facilities.status_code)
    print(r_facilities.content)
    content = r_facilities.json()
    unit = content["unit"]
    year = content["year"]
    df_facilities = pd.DataFrame(content["data"]["rows"])
    df_facilities.drop(columns=["id", "icons"], inplace=True)
    df_facilities.rename(columns={"total": "metric_tons_CO2"}, inplace=True)
    df_facilities.loc[:, "unit"] = unit
    df_facilities.loc[:, "year"] = year

    return df_facilities

### Querying the API

In [28]:
df_facilities = get_facilities(new_payload, year_to_query=2020)
df_facilities

200
b'{"data":{"cols":[{"id":"icons","name":"","field":"icons","sortable":false,"type":"string","cssClass":""},{"id":"facility","name":"Facility","field":"facility","sortable":true,"type":"string","cssClass":"list-item"},{"id":"city","name":"City","field":"city","sortable":true,"type":"string","cssClass":"list-item"},{"id":"state","name":"State","field":"state","sortable":true,"type":"string","cssClass":"list-item"},{"id":"total","name":"Total Reported Emissions","field":"total","sortable":true,"type":"number","cssClass":"list-item list-number"},{"id":"sectors","name":"Sectors","field":"sectors","sortable":true,"type":"string","cssClass":"list-item"}],"rows":[{"id":"id0","icons":"","facility":"AES Alamitos [1001444]","city":"LONG BEACH","state":"CA","total":"823,900","sectors":"Power Plants"},{"id":"id1","icons":"<img src=\'img/notification.gif\' title=\'Verification of this facility&apos;s report was still in progress as of $!{dataDate}.\' alt=\'Verification of this facility&apos;s re

Unnamed: 0,facility,city,state,metric_tons_CO2,sectors,unit,year
0,AES Alamitos [1001444],LONG BEACH,CA,823900,Power Plants,Metric Tons,2020
1,California Resources Production Corporation- L...,Long Beach,CA,244801,Petroleum and Natural Gas Systems,Metric Tons,2020
2,EDGINGTON OIL COMPANY [1007347],LONG BEACH,CA,---,Petroleum and Natural Gas Systems,Metric Tons,2020
3,Gold Bond - LGB Plant [1011213],Long Beach,CA,31401,Minerals,Metric Tons,2020
4,Haynes Generating Station [1006097],LONG BEACH,CA,1486475,Power Plants,Metric Tons,2020
5,Long Beach Generating Station [1000490],LONG BEACH,CA,30923,Power Plants,Metric Tons,2020
6,Los Angeles Basin 760 THUMS Long Beach Company...,Los Angeles,CA,---,Petroleum and Natural Gas Systems,Metric Tons,2020
7,Southeast Resource Recovery Facility (SERRF) [...,LONG BEACH,CA,133840,Waste,Metric Tons,2020
8,Thums Long Beach Company [1005066],Long Beach,CA,---,Petroleum and Natural Gas Systems,Metric Tons,2020


### Testing Class

In [1]:
from EPAExtractor import EPAExtractor

In [2]:
epa_extractor = EPAExtractor()
df_facilities = epa_extractor.get_facilities(year_to_query=2021)
df_facilities

JSONDecodeError: Expecting value: line 1 column 1 (char 0)