In [1]:
import requests
import pandas as pd
import numpy as np
import csv
from lxml import html
from concurrent.futures import as_completed, ThreadPoolExecutor
from tqdm.notebook import tqdm

In [2]:
# fetch FIA data from Evalidator
START_URL = 'https://apps.fs.usda.gov/Evalidator/evalidator.jsp' # GET
STEP1_URL = 'https://apps.fs.usda.gov/Evalidator/page1tmattrPost.jsp' # POST 
STEP2_URL = 'https://apps.fs.usda.gov/Evalidator/page2tmattrPostB.jsp' # POST 
STEP3_URL = 'https://apps.fs.usda.gov/Evalidator/page3tmgeogPost.jsp' # POST 
STEP4_URL = 'https://apps.fs.usda.gov/Evalidator/page4tmprcPost.jsp' # POST row and column variables

PAYLOAD_1 = {'reportType': 'State',
             'latTextBox': '',
             'lonTextBox': '', 
             'radiusTextBox': '',
             'landbasis': 'Forest land',
#              'numestngroup': 'Annual harvest removals dry weight',
             'denestngroup': 'No denominator - just produce estimates',
             'STANDARD': 'Continue',
}

PAYLOAD_2 = {
#     'numeratorAttributeSel': 'Average annual harvest removals of aboveground biomass of trees (at least 1 inches d.b.h./d.r.c.), in dry short tons, on forest land',
    'denominatorAttributeSel': 'No denominator - just produce estimates',
    'FIAorRPA': 'RPADEF',
    'ALLINV': 'YES',
    'STANDARD': 'Continue',
}

PAYLOAD_3 = {
#     'geog_sel': '012012Y ALABAMA 2006;2007;2008;2009;2010;2011;2012',
    'standard': 'Continue',
}

PAYLOAD_4 = {
    'prc_sel_page': 'Reserved status class',
    'prc_sel_page_time': 'Current',
    'prc_sel_row': 'County code and name',
    'prc_sel_row_time': 'Current',
    'r1': '',
    'prc_sel_col': 'Ownership group',
    'prc_sel_col_time': 'Current',
    'c1': '',
    'FILTERYESNO': 'NO',
    'ESTONLY': 'N',
}

In [3]:
def get_evalidator_geog_sel_options(numestngroup, numeratorAttributeSel):
    with requests.Session() as s:
        s.headers.update({'User-Agent': 'Mozilla/5.0'})
        s.get(START_URL)
        payload_1 = dict(PAYLOAD_1, **{'numestngroup': numestngroup})
        s.post(STEP1_URL, data=payload_1)
        payload_2 = dict(PAYLOAD_2, **{'numeratorAttributeSel': numeratorAttributeSel})
        r = s.post(STEP2_URL, data=payload_2)
    tree = html.fromstring(r.content)
    sel = tree.xpath('/html/body/form/select')
    geog_opts = [x.text.strip() for x in sel[0].getchildren()]
    return geog_opts

In [25]:
def fetch_fia_data_by_county(numestngroup, numerator, geog_sel):

    with requests.Session() as s:
        s.headers.update({'User-Agent': 'Mozilla/5.0'})
        s.get(START_URL)
        payload_1 = dict(PAYLOAD_1, **{'numestngroup': numestngroup})
        s.post(STEP1_URL, data=payload_1)
        payload_2 = dict(PAYLOAD_2, **{'numeratorAttributeSel': numerator})
        s.post(STEP2_URL, data=payload_2)
        payload_3 = dict(PAYLOAD_3, **{'geog_sel': geog_sel})
        s.post(STEP3_URL, data=payload_3)
        response = s.post(STEP4_URL, data=PAYLOAD_4)
        
    outputs = pd.read_html(response.content, skiprows=[0,2], header=0)
    try:
        unreserved_mean = outputs[3].replace('-', 0).set_index('County code and name').astype(float)
        unreserved_std_err = outputs[4].replace('-', 0).set_index('County code and name').astype(float)
        unreserved_count = outputs[5].replace('-', 0).set_index('County code and name').astype(float)
        unreserved_std = unreserved_std_err * np.sqrt(unreserved_count)
    except:
        print(f'{geog_sel} has no County code and name.')
        return        

    tables = [unreserved_mean, unreserved_std, unreserved_count]
    for j, table in enumerate(tables):
        try:
            table.insert(0, 'STATE', table.index.to_series().apply(lambda x: x.split()[1]))
            table.insert(1, 'FIPS', table.index.to_series().apply(lambda x: x.split()[0]))
            table.insert(2, 'COUNTY', table.index.to_series().apply(lambda x: x.split()[2]))
        except:
            print(f'{geog_sel} failed parsing.')
            return
        
        table.insert(3, 'YEAR', int(geog_sel.split(' ')[0][2:6]))
        table.insert(4, 'ALLYEARS', geog_sel.split(' ')[-1])
        table.insert(5, 'MINYEAR', int(geog_sel.split(' ')[-1].split(';')[0]))
        table.insert(6, 'MAXYEAR', int(geog_sel.split(' ')[-1].split(';')[-1]))
        table.columns = [col.upper() for col in table.columns]
        tables[j] = table.reset_index(drop=True)
       

    INDEX_COLS = ['STATE', 'FIPS', 'COUNTY', 'YEAR', 'ALLYEARS', 'MINYEAR', 'MAXYEAR']
    STATS = ['MEAN', 'STDEV', 'COUNT']
    for i, table in enumerate(tables):
        table = table.set_index(INDEX_COLS)
        table.columns = pd.MultiIndex.from_product([[STATS[i]], list(table.columns)], names=['STATISTIC', 'OWNER_GROUP'])
        tables[i] = table
    
    fia_data = pd.concat(tables, axis=1)

    return fia_data

In [5]:
def multithreaded_download(numestngroup, numerator, num_threads=4):
    geogs = get_evalidator_geog_sel_options(numestngroup, numerator)
    to_download = [(numestngroup, numerator, geog) for geog in geogs]
    with ThreadPoolExecutor(num_threads) as executor:
        print('Starting to download data from EVALIDator.')
        jobs = [executor.submit(fetch_fia_data_by_county, *params) for params in to_download]
        results = []

        for job in tqdm(as_completed(jobs), total=len(jobs)):
            results.append(job.result())
    
    fia_data = pd.concat(results, axis=0)
    
    return fia_data

In [6]:
NUMESTNGROUP = 'Annual harvest removals dry weight'
NUMERATOR = 'Average annual harvest removals of aboveground biomass of trees (at least 1 inches d.b.h./d.r.c.), in dry short tons, on forest land'

fia_data = multithreaded_download(NUMESTNGROUP, NUMERATOR)
fia_data.to_csv('C:/GitHub/embodied_carbon/data/interim/fia_harvest_removals_by_county.csv', 
                index=True, header=True,
                quoting=csv.QUOTE_NONNUMERIC)

Starting to download data from EVALIDator.


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=429.0), HTML(value='')))




In [7]:
NUMESTNGROUP = 'Annual net growth dry weight'
NUMERATOR = 'Average annual net growth of aboveground biomass of trees (at least 1 inches d.b.h./d.r.c.), in dry short tons, on forest land'
fia_data = multithreaded_download(NUMESTNGROUP, NUMERATOR)
fia_data.to_csv('C:/GitHub/embodied_carbon/data/interim/fia_net_growth_by_county.csv', 
                index=True, header=True,
                quoting=csv.QUOTE_NONNUMERIC)

Starting to download data from EVALIDator.


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=429.0), HTML(value='')))

172003N ILLINOIS 2001;2002;2003
172004Y ILLINOIS 2001;2002;2003;2004
172005Y ILLINOIS 2001;2002;2003;2004;2005



In [8]:
NUMESTNGROUP = 'Annual net change dry weight'
NUMERATOR = 'Average annual net change of aboveground biomass of trees (at least 1 inches d.b.h./d.r.c.), in dry short tons, on forest land'
fia_data = multithreaded_download(NUMESTNGROUP, NUMERATOR)
fia_data.to_csv('C:/GitHub/embodied_carbon/data/interim/fia_net_change_by_county.csv', 
                index=True, header=True,
                quoting=csv.QUOTE_NONNUMERIC)

Starting to download data from EVALIDator.


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=429.0), HTML(value='')))

172003N ILLINOIS 2001;2002;2003
172004Y ILLINOIS 2001;2002;2003;2004
172005Y ILLINOIS 2001;2002;2003;2004;2005



In [26]:
NUMESTNGROUP = "Tree carbon"
NUMERATOR = "Aboveground and belowground carbon in live trees (at least 1 inch d.b.h./d.r.c), in short tons, on forest land"
fia_data = multithreaded_download(NUMESTNGROUP, NUMERATOR)
fia_data.to_csv('C:/GitHub/embodied_carbon/data/interim/fia_tree_carbon_by_county.csv', 
                index=True, header=True,
                quoting=csv.QUOTE_NONNUMERIC)

Starting to download data from EVALIDator.


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=725.0), HTML(value='')))

602001N AMERICAN SAMOA 2001 failed parsing.
602012N AMERICAN SAMOA 2012 failed parsing.
642016N FEDERATED STATES OF  2016 failed parsing.
642005N FEDERATED STATES OF  2005 failed parsing.
662002N GUAM 2002 failed parsing.
662013N GUAM 2013 failed parsing.
692015N NORTHERN MARIANA ISL 2015 failed parsing.
692004N NORTHERN MARIANA ISL 2004 failed parsing.
702003N PALAU 2003 failed parsing.
702014N PALAU 2014 failed parsing.

