# Rollup of Total Grant Value for all US Counties

Grant information by state was compiled by [Investigate West](http://www.invw.org/2012/06/11/lwcf-grants-database-1283/) for the period 1965-2011.

In [47]:
import glob

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [48]:
# Collect all file names and process into DataFrames
state_files = glob.glob('./Data/StateGrantData/*.xlsx')
dfs = []

for file in state_files:
    dfs.append(pd.read_excel(file))

all_states = pd.concat(dfs, ignore_index=True)
all_states.head()

Unnamed: 0,Grant ID & Element,Grant Name,Sponsor,County,State,Grant Amount,Year Approved,Year Completed,Type
0,3 - XXX,INDIAN PEAKS - BAKER CANYON,STATE OF UTAH,BEAVER,UT,5985.92,1966,1966,Development
1,4 - XXX,WEST BEAR RIVER BAY,STATE OF UTAH,BOX ELDER,UT,18500.0,1966,1967,Development
2,6 - XXX,WELLSVILLE PARK,CITY OF WELLSVILLE,CACHE,UT,934.25,1966,1967,Acquisition
3,2 - XXX,FARMINGTON BAY WATERFOWL AREA,STATE OF UTAH,DAVIS,UT,15150.0,1966,1967,Acquisition
4,11 - XXX,4 CITY PARKS,CITY OF BOUNTIFUL,DAVIS,UT,16225.15,1966,1966,Development


In [49]:
all_states.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41145 entries, 0 to 41144
Data columns (total 9 columns):
Grant ID & Element    41145 non-null object
Grant Name            41145 non-null object
Sponsor               41145 non-null object
County                41145 non-null object
State                 41145 non-null object
Grant Amount          41145 non-null float64
Year Approved         41145 non-null int64
Year Completed        41145 non-null int64
Type                  41145 non-null object
dtypes: float64(1), int64(2), object(6)
memory usage: 2.8+ MB


In [50]:
all_states['State'].sort_values().unique()

array(['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
       'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
       'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
       'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
       'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY'], dtype=object)

In [51]:
# Count states - should be 51 (includes DC)
all_states['State'].nunique()

51

In [52]:
# Make all counties lowercase
all_states['County'] = all_states['County'].map(lambda s: s.lower())

In [54]:
# Replace "saint" with "st." in county names
all_states['County'].replace('saint', 'st.', inplace=True)

In [55]:
all_states[[all_states['County']]]

0              beaver
1           box elder
2               cache
3               davis
4               davis
5            planning
6              uintah
7               cache
8               cache
9               cache
10              cache
11              davis
12              davis
13              davis
14              davis
15              grand
16             morgan
17           planning
18          salt lake
19          salt lake
20          salt lake
21          salt lake
22          salt lake
23             sevier
24               utah
25               utah
26               utah
27            wasatch
28              weber
29              weber
             ...     
41115         gladwin
41116           huron
41117          ingham
41118          ingham
41119          monroe
41120        muskegon
41121         oakland
41122         oakland
41123    presque isle
41124           wayne
41125       cheboygan
41126            lake
41127       roscommon
41128     saint clair
41129     

In [24]:
# Create grouped grant data
all_counties = all_states.groupby(['State', 'County'])['Grant Amount'].agg(['count', 'sum'])

# Reset multi-index to flatten columns
all_counties = all_counties.reset_index()

# Re-name columns
all_counties.columns = ['state', 'county_orig', 'num_grants', 'grant_value']

all_counties.head(5)

Unnamed: 0,state,county_orig,num_grants,grant_value
0,AK,ALEUTIANS EAST,5,123552.58
1,AK,ALEUTIANS WEST,1,52405.0
2,AK,ANCHORAGE,67,9475506.86
3,AK,BETHEL,3,366133.32
4,AK,DILLINGHAM,2,96119.8
5,AK,FAIRBANKS NORTH STAR,23,2765458.52
6,AK,HAINES,10,2978361.65
7,AK,JUNEAU,18,1945647.75
8,AK,KENAI PENINSULA,34,3032183.91
9,AK,KENAI-COOK INLET,1,20000.0


In [36]:
# Drop multi-county, planning, and no county rows
all_counties = all_counties[~all_counties['county_orig'].isin(['MULTI-COUNTY', 'PLANNING', 'NO COUNTY'])]

In [41]:
all_counties[all_counties['state'] == 'MD']

Unnamed: 0,state,county_orig,num_grants,grant_value
1150,MD,ALLEGANY,13,943778.85
1151,MD,ANNE ARUNDEL,39,11511614.0
1152,MD,BALTIMORE,10,4500529.86
1153,MD,BALTIMORE CITY,21,2195982.92
1154,MD,CALVERT,5,1582202.9
1155,MD,CAROLINE,8,1078302.58
1156,MD,CARROLL,13,1065374.09
1157,MD,CECIL,6,723400.01
1158,MD,CHARLES,10,4061300.75
1159,MD,DORCHESTER,7,1276220.76


In [42]:
# Split out exceptions of county names (AK, LA, DC, MD, )
exceptions = all_counties[all_counties['state'].isin(['AK', 'LA', 'DC', 'VA', 'MD', 'NV', 'MO'])]
exceptions.shape

(370, 4)

In [46]:
all_counties[all_counties['county_orig'].str.contains('SAINT')]

Unnamed: 0,state,county_orig,num_grants,grant_value
88,AL,SAINT CLAIR,13,516435.58
162,AR,SAINT FRANCIS,1,8302.92
375,FL,SAINT JOHNS,3,378520.71
376,FL,SAINT LUCIE,5,566586.93
749,IL,SAINT CLAIR,20,2756349.46
1108,LA,SAINT BERNARD,4,1359347.46
1109,LA,SAINT CHARLES,3,719489.77
1110,LA,SAINT HELENA,1,47069.44
1111,LA,SAINT JAMES,5,402837.23
1112,LA,SAINT LANDRY,12,837154.01


In [32]:
all_counties['county_orig'].count()

2989

In [30]:
# Import county id data
county_ids = pd.read_csv('./Data/county_names.csv', sep=',')
county_ids.head()

Unnamed: 0,id,state,county
0,1001,Alabama,Autauga County
1,1003,Alabama,Baldwin County
2,1005,Alabama,Barbour County
3,1007,Alabama,Bibb County
4,1009,Alabama,Blount County


In [31]:
county_ids['county'].count()

3219