In [1]:
import pandas as pd
import csv

In [2]:
fips_codes = pd.read_html('https://en.wikipedia.org/wiki/List_of_United_States_FIPS_codes_by_county')[1]
fips_codes['COUNTY'] = fips_codes['County or equivalent'].apply(lambda x: x.split('County')[0])
fips_codes = fips_codes.rename({'State or equivalent': 'STATE'}, axis=1).drop(['County or equivalent'], axis=1)
fips_codes = fips_codes.set_index('FIPS')

In [3]:
def fetch_mber(county_fips):
    url = f'http://www.bber.umt.edu/FIR/HarvestT1.aspx?co={county_fips}'
    df = pd.read_html(url, skiprows=[0,1], header=0)[0]
    df.columns = ['YEAR'] + [col.replace('.', '').upper() for col in df.columns[1:]]
    df[df.columns] = df[df.columns].astype(int)
    df = df.melt(id_vars=['YEAR'], 
                 value_vars=['INDUSTRY', 'NIPF & TRIBAL', 'STATE', 'FOREST SERVICE', 'BLM & OTHER PUBLIC', 'TOTAL'],
                 var_name='OWNER', value_name='OUTPUT_MBF')
    df.insert(0, 'STATE', fips_codes.loc[county_fips, 'STATE'])
    df.insert(1, 'COUNTY', fips_codes.loc[county_fips, 'COUNTY'])
    df.insert(2, 'FIPS', county_fips)
    return df

In [4]:
STATES_TO_FETCH = ['Oregon', 'Washington', 'Idaho', 'California', 'Montana']
counties_to_fetch = fips_codes.loc[fips_codes.STATE.isin(STATES_TO_FETCH)]

In [5]:
dfs = []
print('Fetching {:,d} county reports from MBER.'.format(len(counties_to_fetch)))
for i, fips in enumerate(counties_to_fetch.index):
    dfs.append(fetch_mber(fips))
    if (i+1) % 10 == 0 and i > 0:
        print('{:,d}'.format(i+1), end='')
    else:
        print('.', end='')
    if (i+1) % 50 == 0:
        print()
print('Done.')

Fetching 233 county reports from MBER.
.........10.........20.........30.........40.........50
.........60.........70.........80.........90.........100
.........110.........120.........130.........140.........150
.........160.........170.........180.........190.........200
.........210.........220.........230...Done.


In [6]:
mber_data = pd.concat(dfs, axis=0)
mber_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 18144 entries, 0 to 101
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   STATE       18144 non-null  object
 1   COUNTY      18144 non-null  object
 2   FIPS        18144 non-null  int64 
 3   YEAR        18144 non-null  int32 
 4   OWNER       18144 non-null  object
 5   OUTPUT_MBF  18144 non-null  int32 
dtypes: int32(2), int64(1), object(3)
memory usage: 850.5+ KB


In [7]:
mber_data.to_csv('C:/GitHub/embodied_carbon/data/interim/mber_data_by_county.csv', 
                index=False, header=True,
                quoting=csv.QUOTE_NONNUMERIC)