# Calculate per state High Priority GRTS cells for NABat sampling frame

There was some discussion about how the high priority GRTS cells should be selected from a sampling frame.  Two options were considered:
* Take the top 5% of all the cells in each frame
* Take the top 5% of cells for each state/territory/province

**Note:  This distinction only applies to the Conus and Canada frames as the rest of the frames only contain a single admin unit. 

written by Colin Talbert   2018-11-20

In [1]:
import nabatpy # Note this is currently pre-alpha but abailable here: https://github.com/talbertc-usgs/nabatpy

In [2]:
conus = nabatpy.get_grts_data('Conus')
conus.set_index('GRTS_ID')

Unnamed: 0_level_0,CONUS_10KM,cnty_n_1,cnty_n_2,cnty_n_3,cnty_n_4,cnty_n_5,cnty_p_1,cnty_p_2,cnty_p_3,cnty_p_4,...,own_USFS,state_n_1,state_n_2,state_n_3,state_n_4,state_p_1,state_p_2,state_p_3,state_p_4,water_p
GRTS_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,74208,Colorado_Mesa,,,,,100.000000,0.000000,0.000000,0.000000,...,0.000000,Colorado,,,,100.000000,0.000000,0.0,0.0,0.034166
5,117258,Montana_Cascade,,,,,100.000000,0.000000,0.000000,0.000000,...,0.000000,Montana,,,,100.000000,0.000000,0.0,0.0,0.652749
6,70585,Kansas_Rooks,,,,,100.000000,0.000000,0.000000,0.000000,...,0.000000,Kansas,,,,100.000000,0.000000,0.0,0.0,0.452705
8,43327,Arkansas_Dallas,,,,,100.000000,0.000000,0.000000,0.000000,...,0.000000,Arkansas,,,,100.000000,0.000000,0.0,0.0,0.077323
13,40023,Texas_Throckmorton,Texas_Haskell,,,,82.271189,17.728811,0.000000,0.000000,...,0.000000,Texas,,,,100.000000,0.000000,0.0,0.0,0.172628
15,80897,Ohio_Auglaize,Ohio_Allen,,,,78.521210,21.478790,0.000000,0.000000,...,0.000000,Ohio,,,,100.000000,0.000000,0.0,0.0,0.395891
18,91800,Wyoming_Sublette,,,,,100.000000,0.000000,0.000000,0.000000,...,0.000000,Wyoming,,,,100.000000,0.000000,0.0,0.0,0.000000
20,37838,Alabama_Macon,Alabama_Lee,Alabama_Russell,,,96.448728,2.623293,0.927978,0.000000,...,0.000000,Alabama,,,,100.000000,0.000000,0.0,0.0,0.587074
22,55180,Arizona_Yavapai,,,,,100.000000,0.000000,0.000000,0.000000,...,7.433560,Arizona,,,,100.000000,0.000000,0.0,0.0,0.096493
24,31748,Louisiana_Natchitoches,,,,,100.000000,0.000000,0.000000,0.000000,...,0.000000,Louisiana,,,,100.000000,0.000000,0.0,0.0,0.883767


In [3]:
conus.head()

Unnamed: 0,CONUS_10KM,GRTS_ID,cnty_n_1,cnty_n_2,cnty_n_3,cnty_n_4,cnty_n_5,cnty_p_1,cnty_p_2,cnty_p_3,...,own_USFS,state_n_1,state_n_2,state_n_3,state_n_4,state_p_1,state_p_2,state_p_3,state_p_4,water_p
0,74208,2,Colorado_Mesa,,,,,100.0,0.0,0.0,...,0.0,Colorado,,,,100.0,0.0,0.0,0.0,0.034166
1,117258,5,Montana_Cascade,,,,,100.0,0.0,0.0,...,0.0,Montana,,,,100.0,0.0,0.0,0.0,0.652749
2,70585,6,Kansas_Rooks,,,,,100.0,0.0,0.0,...,0.0,Kansas,,,,100.0,0.0,0.0,0.0,0.452705
3,43327,8,Arkansas_Dallas,,,,,100.0,0.0,0.0,...,0.0,Arkansas,,,,100.0,0.0,0.0,0.0,0.077323
4,40023,13,Texas_Throckmorton,Texas_Haskell,,,,82.271189,17.728811,0.0,...,0.0,Texas,,,,100.0,0.0,0.0,0.0,0.172628


In [4]:
conus['frame_high_priority'] = 0
conus.loc[conus.index <= int(conus.GRTS_ID.quantile(0.05)), 'frame_high_priority'] = 1

In [5]:
conus.sample(10)

Unnamed: 0,CONUS_10KM,GRTS_ID,cnty_n_1,cnty_n_2,cnty_n_3,cnty_n_4,cnty_n_5,cnty_p_1,cnty_p_2,cnty_p_3,...,state_n_1,state_n_2,state_n_3,state_n_4,state_p_1,state_p_2,state_p_3,state_p_4,water_p,frame_high_priority
61046,57903,99669,California_Ventura,,,,,100.0,0.0,0.0,...,California,,,,100.0,0.0,0.0,0.0,0.046753,0
51089,43379,83406,Alabama_Jefferson,,,,,100.0,0.0,0.0,...,Alabama,,,,100.0,0.0,0.0,0.0,0.490581,0
22917,48442,37487,Mississippi_DeSoto,Mississippi_Tate,,,,99.188775,0.811225,0.0,...,Mississippi,,,,100.0,0.0,0.0,0.0,13.923834,0
50375,32674,82232,Louisiana_Natchitoches,,,,,100.0,0.0,0.0,...,Louisiana,,,,100.0,0.0,0.0,0.0,9.139379,0
1172,111719,1925,Montana_Golden Valley,,,,,100.0,0.0,0.0,...,Montana,,,,100.0,0.0,0.0,0.0,0.007171,1
31853,17814,52061,Texas_DeWitt,Texas_Karnes,,,,55.903448,44.096552,0.0,...,Texas,,,,100.0,0.0,0.0,0.0,0.179821,0
14724,92938,24079,Michigan_Genesee,Michigan_Lapeer,Michigan_Oakland,,,84.650412,14.899923,0.449665,...,Michigan,,,,100.0,0.0,0.0,0.0,1.396892,0
32889,131061,53761,Washington_Clallam,,,,,7.319767,0.0,0.0,...,Washington,,,,7.319767,0.0,0.0,0.0,7.357357,0
46317,123232,75647,Washington_Spokane,Washington_Lincoln,Washington_Stevens,,,97.057188,1.612766,1.330047,...,Washington,,,,100.0,0.0,0.0,0.0,2.578254,0
81415,94883,132948,Connecticut_Tolland,Connecticut_New London,Connecticut_Windham,,,45.057718,36.406866,18.535416,...,Connecticut,,,,100.0,0.0,0.0,0.0,0.493286,0


In [6]:
conus['stateprov_high_priority'] = 0

for state in conus.state_n_1.unique():
    if state != 'NA':
        print(state)
        this_state = conus[(conus.state_n_1 == state) | (conus.state_n_2 == state) | (conus.state_n_3 == state) | (conus.state_n_4 == state)]
        conus.loc[this_state.loc[this_state.index <= int(this_state.GRTS_ID.quantile(0.05))].index, 'stateprov_high_priority'] = 1

Colorado
Montana
Kansas
Arkansas
Texas
Ohio
Wyoming
Alabama
Arizona
Louisiana
Nevada
Iowa
Wisconsin
Idaho
Nebraska
Michigan
Mississippi
North Dakota
Kentucky
New Mexico
South Dakota
Maine
California
Georgia
Virginia
Missouri
Pennsylvania
Illinois
Washington
Utah
Minnesota
Indiana
North Carolina
Oregon
South Carolina
New York
Florida
Vermont
Tennessee
Oklahoma
Maryland
West Virginia
Massachusetts
New Hampshire
Connecticut
Delaware
New Jersey
Rhode Island
District of Columbia


### Save this out to a csv

In [8]:
out = conus[['GRTS_ID', 'frame_high_priority', 'stateprov_high_priority']]

out.to_csv(r"c:\temp\conus_priority.csv")

### A quick QA\QC visualization

In [14]:
import folium

state = 'Florida'
this_state = conus[(conus.state_n_1 == state) | (conus.state_n_2 == state) | (conus.state_n_3 == state) | (conus.state_n_4 == state)]
this_state.crs = {'init': 'epsg:4326'}

m = folium.Map([27.6648, -81.51], zoom_start=7, tiles='cartodbpositron')

def style_function(feature):
    state_pri = int(feature['properties']['stateprov_high_priority'])
    frame_pri = int(feature['properties']['frame_high_priority'])
    
    both = state_pri and frame_pri
    
    return {
        'fillOpacity': 0.9 if (state_pri or frame_pri) else 0.1,
        'weight': 0.5,
        'fillColor': 'blue' if both \
             else 'orange' if state_pri \
             else 'green' if frame_pri \
             else 'white'
    }


folium.GeoJson(this_state, style_function=style_function).add_to(m)
m

#### Here are the cells that don't agree between the two methods

In [15]:
conus[conus.frame_high_priority != conus.stateprov_high_priority]

Unnamed: 0,CONUS_10KM,GRTS_ID,cnty_n_1,cnty_n_2,cnty_n_3,cnty_n_4,cnty_n_5,cnty_p_1,cnty_p_2,cnty_p_3,...,state_n_2,state_n_3,state_n_4,state_p_1,state_p_2,state_p_3,state_p_4,water_p,frame_high_priority,stateprov_high_priority
1471,21094,2420,,,,,,0.000000,0.000000,0.000000,...,,,,0.000000,0.000000,0.0,0.0,0.005395,1,0
4034,95351,6608,Rhode Island_Kent,,,,,100.000000,0.000000,0.000000,...,,,,100.000000,0.000000,0.0,0.0,3.370504,1,0
5942,76347,9728,Delaware_Sussex,,,,,100.000000,0.000000,0.000000,...,,,,100.000000,0.000000,0.0,0.0,11.993131,1,0
6197,106909,10135,Vermont_Washington,Vermont_Chittenden,Vermont_Addison,,,67.873790,28.194528,3.931682,...,,,,100.000000,0.000000,0.0,0.0,0.046894,1,0
6200,98453,10139,Wisconsin_Outagamie,,,,,100.000000,0.000000,0.000000,...,,,,100.000000,0.000000,0.0,0.0,0.136172,1,0
6224,41082,10180,Georgia_Troup,,,,,100.000000,0.000000,0.000000,...,,,,100.000000,0.000000,0.0,0.0,0.611390,1,0
6230,32785,10188,Georgia_Camden,Georgia_Glynn,,,,99.520834,0.479166,0.000000,...,,,,100.000000,0.000000,0.0,0.0,52.288216,1,0
6244,103063,10211,Wisconsin_Rusk,Wisconsin_Taylor,,,,73.524421,26.475579,0.000000,...,,,,100.000000,0.000000,0.0,0.0,0.149251,1,0
6277,98925,10271,Wisconsin_Kewaunee,,,,,100.000000,0.000000,0.000000,...,,,,100.000000,0.000000,0.0,0.0,100.000000,1,0
6280,102631,10275,Wisconsin_Door,,,,,100.000000,0.000000,0.000000,...,,,,100.000000,0.000000,0.0,0.0,100.000000,1,0


## Let's do the same for Canada

In [16]:
canada = nabatpy.get_grts_data('Canada').set_index('GRTS_ID')


In [17]:
canada['GRTS_ID'] = canada.index
canada['frame_high_priority'] = 0
canada.loc[canada.index <= int(canada.GRTS_ID.quantile(0.05)), 'frame_high_priority'] = 1

In [18]:
canada['terrprov_high_priority'] = 0

for terr in canada.terr_n_1.unique():
    if terr != 'NA':
        print(terr)
        this_terr = canada[(canada.terr_n_1 == terr) | (canada.terr_n_2 == terr) | (canada.terr_n_3 == terr) | (canada.terr_n_4 == terr)]
        canada.loc[this_terr.loc[this_terr.index <= int(this_terr.GRTS_ID.quantile(0.05))].index, 'terrprov_high_priority'] = 1

Manitoba
Quebec
New Brunswick
Northwest Territories
Nunavut
Alberta
British Columbia
Ontario
Saskatchewan
Yukon
Newfoundland and Labrador
Nova Scotia
Prince Edward Island


In [19]:
out = canada[['GRTS_ID', 'frame_high_priority', 'terrprov_high_priority']]

out.to_csv(r"c:\temp\canada_priority.csv")