# Rollup of Total Grant Value for all US Counties

Grant information by state was compiled by [Investigate West](http://www.invw.org/2012/06/11/lwcf-grants-database-1283/) for the period 1965-2011.

In [47]:
import glob

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [48]:
# Collect all file names and process into DataFrames
state_files = glob.glob('./Data/StateGrantData/*.xlsx')
dfs = []

for file in state_files:
    dfs.append(pd.read_excel(file))

all_states = pd.concat(dfs, ignore_index=True)
all_states.head()

Unnamed: 0,Grant ID & Element,Grant Name,Sponsor,County,State,Grant Amount,Year Approved,Year Completed,Type
0,3 - XXX,INDIAN PEAKS - BAKER CANYON,STATE OF UTAH,BEAVER,UT,5985.92,1966,1966,Development
1,4 - XXX,WEST BEAR RIVER BAY,STATE OF UTAH,BOX ELDER,UT,18500.0,1966,1967,Development
2,6 - XXX,WELLSVILLE PARK,CITY OF WELLSVILLE,CACHE,UT,934.25,1966,1967,Acquisition
3,2 - XXX,FARMINGTON BAY WATERFOWL AREA,STATE OF UTAH,DAVIS,UT,15150.0,1966,1967,Acquisition
4,11 - XXX,4 CITY PARKS,CITY OF BOUNTIFUL,DAVIS,UT,16225.15,1966,1966,Development


In [49]:
all_states.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41145 entries, 0 to 41144
Data columns (total 9 columns):
Grant ID & Element    41145 non-null object
Grant Name            41145 non-null object
Sponsor               41145 non-null object
County                41145 non-null object
State                 41145 non-null object
Grant Amount          41145 non-null float64
Year Approved         41145 non-null int64
Year Completed        41145 non-null int64
Type                  41145 non-null object
dtypes: float64(1), int64(2), object(6)
memory usage: 2.8+ MB


In [50]:
all_states['State'].sort_values().unique()

array(['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
       'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
       'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
       'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY'], dtype=object)

In [51]:
# Count states - should be 51 (includes DC)
all_states['State'].nunique()

51

In [52]:
# Make all counties lowercase
all_states['County'] = all_states['County'].map(lambda s: s.lower())

In [70]:
# Fix pre-group structural issues (different spellings)
all_states['County'] = all_states['County'].map(lambda s: s.replace('saint', 'st.'))
all_states['County'] = all_states['County'].map(lambda s: s.replace('(city)', 'city'))

## Group County Data

In [71]:
# Create grouped grant data
all_counties = all_states.groupby(['State', 'County'])['Grant Amount'].agg(['count', 'sum'])

# Reset multi-index to flatten columns
all_counties = all_counties.reset_index()

# Re-name columns
all_counties.columns = ['state', 'county_orig', 'num_grants', 'grant_value']

all_counties.head()

Unnamed: 0,state,county_orig,num_grants,grant_value
0,AK,aleutians east,5,123552.58
1,AK,aleutians west,1,52405.0
2,AK,anchorage,67,9475506.86
3,AK,bethel,3,366133.32
4,AK,dillingham,2,96119.8


In [72]:
# Drop multi-county, planning, and no county rows
drop_rows = ['multi-county', 'planning', 'no county', 'county name missing']
all_counties = all_counties[~all_counties['county_orig'].isin(drop_rows)]

In [73]:
all_counties[all_counties['state'] == 'MO']

Unnamed: 0,state,county_orig,num_grants,grant_value
1361,MO,adair,9,245765.46
1362,MO,andrew,2,208092.50
1363,MO,atchison,3,70114.20
1364,MO,audrain,6,138908.00
1365,MO,barry,9,241236.85
1366,MO,barton,5,249772.30
1367,MO,bates,3,194937.25
1368,MO,benton,9,932155.27
1369,MO,bollinger,1,9163.00
1370,MO,boone,46,3035431.64


In [74]:
# Fix one-off structural differences between county columns and id data

# AK
all_counties['county_orig'].replace('matanuska susitna', 'matanuska-susitna', inplace=True)
all_counties['county_orig'].replace('wales hyder', 'wales-hyder', inplace=True)
all_counties['county_orig'].replace('skagway hoonah angoon', 'skagway', inplace=True)
all_counties['county_orig'].replace('valdez cordova', 'valdez-cordova', inplace=True)
all_counties['county_orig'].replace('yukon koyukuk', 'yukon-koyukuk', inplace=True)

# MO
all_counties['county_orig'].replace('ste genevieve', 'ste. genevieve', inplace=True)

In [77]:
all_counties['county_orig'].count()

2964

## Process County ID Data

In [78]:
# Import county id data
county_ids = pd.read_csv('./Data/county_names.csv', sep=',')
county_ids.head()

Unnamed: 0,id,state,county
0,1001,Alabama,Autauga County
1,1003,Alabama,Baldwin County
2,1005,Alabama,Barbour County
3,1007,Alabama,Bibb County
4,1009,Alabama,Blount County


In [79]:
county_ids['county'].count()

3219