In [1]:
import geocoder
import pandas as pd
import numpy as np
import re

Load datasets. The cities are grabbed from 3 datasets on Sept 10.

In [7]:
df = pd.concat([pd.read_csv('cleansed_data/baseline_00.csv')[['region name', 'lon', 'lat']],
                pd.read_csv('cleansed_data/baseline_08.csv')[['region name', 'lon', 'lat']],
                pd.read_csv('cleansed_data/baseline_16.csv')[['region name', 'lon', 'lat']]], ignore_index=True)

Take mean value for the lon, lat

In [28]:
regions = df.groupby('region name').mean().reset_index()

In [29]:
regions.head()

Unnamed: 0,region name,lon,lat
0,Abbeville_1,-82.448461,34.203223
1,Abbeville_5,-83.431289,31.933351
2,Aberdeen_2,-83.716002,38.708632
3,Aberdeen_3,-79.415092,35.101738
4,Aberdeen_4,-79.410768,35.054672


2 methods are used here to find county and state of the region.

First, use reverse geocoding

In [40]:
county = []
state = []
for i in range(regions.shape[0]):
    loc = geocoder.mapquest([regions['lat'].iloc[i], regions['lon'].iloc[i]], method='reverse', key='sC87jHB8CsRXTnfhb1I123jsWgSW62tt').json
    county.append(loc['county'])
    state.append(loc['state'])

In [43]:
regions['county'] = county
regions['state'] = state

In [51]:
regions.head()

Unnamed: 0,region name,lon,lat,county,state,evacuation
0,Abbeville_1,-82.448461,34.203223,Abbeville,SC,no evacuation
1,Abbeville_5,-83.431289,31.933351,Wilcox,GA,no evacuation
2,Aberdeen_2,-83.716002,38.708632,Brown,OH,no evacuation
3,Aberdeen_3,-79.415092,35.101738,Moore,NC,no evacuation
4,Aberdeen_4,-79.410768,35.054672,Hoke,NC,no evacuation


Second, use geocoding confined in a bounding box

In [145]:
county_here = []
state_here = []
lon_here = []
lat_here = []
for i in range(regions.shape[0]):
    lon = regions['lon'].iloc[i]
    lat = regions['lat'].iloc[i]
    if re.match('[^_]+_[^_]+_[^_]+', regions['region name'].iloc[i])!=None:
        county_here.append(county[i])
        state_here.append(state[i])
        lon_here.append(lon)
        lat_here.append(lat)
    else:
        loc_name = re.sub('_\d+', '', regions['region name'].iloc[i])
        if re.match('^Mc', loc_name)!=None:
            loc_name = re.sub(' ', '', loc_name)
        bbox = [lon-0.25, lat-0.25, lon+0.25, lat+0.25]
        try:
            loc = geocoder.here(loc_name, bbox=bbox, app_id='xIirayHrtGRTzhFhowWm', app_code='PHY3HgiT4QsGtpMPUA_tRA').json
            county_here.append(loc['county'])
            state_here.append(loc['state'])
            lon_here.append(loc['lng'])
            lat_here.append(loc['lat'])
        except:
            county_here.append('unknown')
            state_here.append('unknown')
            lon_here.append(-1)
            lat_here.append(-1)

In [157]:
regions['county_here'] = county_here
regions['state_here'] = state_here
regions['lon_here'] = lon_here
regions['lat_here'] = lat_here

In [159]:
idx = regions[regions['county_here']=='unknown'].index
for i in idx:
    lon = regions['lon'].loc[i]
    lat = regions['lat'].loc[i]
    loc_name = re.sub('_\d+', '', regions['region name'].loc[i])
    bbox = [lon-0.5, lat-0.5, lon+0.5, lat+0.5]
    try:
        loc = geocoder.here(loc_name, bbox=bbox, app_id='xIirayHrtGRTzhFhowWm', app_code='PHY3HgiT4QsGtpMPUA_tRA').json
        regions['county_here'].loc[i]=loc['county']
        regions['state_here'].loc[i]=loc['state']
        regions['lon_here'].loc[i]=loc['lng']
        regions['lat_here'].loc[i]=loc['lat']
    except:
        print i

1007
2550


In [166]:
loc = geocoder.here('Fort Meade', bbox=[-77.281064,38.551937, -76.281064,39.551937], app_id='xIirayHrtGRTzhFhowWm', app_code='PHY3HgiT4QsGtpMPUA_tRA').json
loc

{'address': u'Fort Meade, MD, United States',
 'bbox': {'northeast': [39.13057, -76.7133],
  'southwest': [39.08718, -76.77576]},
 'city': u'Fort Meade',
 'confidence': 6,
 'country': u'USA',
 'county': u'Anne Arundel',
 'lat': 39.10667,
 'lng': -76.73481,
 'ok': True,
 'postal': u'20755',
 'raw': {u'Address': {u'AdditionalData': [{u'key': u'CountryName',
     u'value': u'United States'},
    {u'key': u'StateName', u'value': u'Maryland'},
    {u'key': u'CountyName', u'value': u'Anne Arundel'},
    {u'key': u'PostalCodeType', u'value': u'N'}],
   u'City': u'Fort Meade',
   u'Country': u'USA',
   u'County': u'Anne Arundel',
   u'Label': u'Fort Meade, MD, United States',
   u'PostalCode': u'20755',
   u'State': u'MD'},
  u'CountryName': u'United States',
  u'CountyName': u'Anne Arundel',
  u'DisplayPosition': {u'Latitude': 39.10667, u'Longitude': -76.73481},
  u'LocationId': u'NT_xn.Hk7jwjNG4J2d8xKgCsB',
  u'LocationType': u'area',
  u'MapView': {u'BottomRight': {u'Latitude': 39.08718, u'

In [167]:
regions['county_here'].loc[1007]=loc['county']
regions['state_here'].loc[1007]=loc['state']
regions['lon_here'].loc[1007]=loc['lng']
regions['lat_here'].loc[1007]=loc['lat']

In [170]:
loc = geocoder.here('Saint Helena', bbox=[-80.859215,32.219252, -80.359215,32.719252], app_id='xIirayHrtGRTzhFhowWm', app_code='PHY3HgiT4QsGtpMPUA_tRA').json
loc

{'address': u'St Helena, SC, United States',
 'bbox': {'northeast': [32.41043, -80.56067],
  'southwest': [32.38345, -80.59263]},
 'city': u'St Helena',
 'confidence': 7,
 'country': u'USA',
 'county': u'Beaufort',
 'lat': 32.39694,
 'lng': -80.57665,
 'ok': True,
 'postal': u'29920',
 'raw': {u'Address': {u'AdditionalData': [{u'key': u'CountryName',
     u'value': u'United States'},
    {u'key': u'StateName', u'value': u'South Carolina'},
    {u'key': u'CountyName', u'value': u'Beaufort'},
    {u'key': u'PostalCodeType', u'value': u'N'}],
   u'City': u'St Helena',
   u'Country': u'USA',
   u'County': u'Beaufort',
   u'Label': u'St Helena, SC, United States',
   u'PostalCode': u'29920',
   u'State': u'SC'},
  u'CountryName': u'United States',
  u'CountyName': u'Beaufort',
  u'DisplayPosition': {u'Latitude': 32.39694, u'Longitude': -80.57665},
  u'LocationId': u'NT_HKA077YK1NWVpEhYb8ru5C',
  u'LocationType': u'area',
  u'MapView': {u'BottomRight': {u'Latitude': 32.38345,
    u'Longitude

In [171]:
regions['county_here'].loc[2550]=loc['county']
regions['state_here'].loc[2550]=loc['state']
regions['lon_here'].loc[2550]=loc['lng']
regions['lat_here'].loc[2550]=loc['lat']

Compare 2 methods. The second one is more accurate, so the results from the second one are adopted.

In [174]:
regions[regions['county'] != regions['county_here']]

Unnamed: 0,region name,lon,lat,county,state,evacuation,county_here,state_here,lon_here,lat_here
4,Aberdeen_4,-79.410768,35.054672,Hoke,NC,no evacuation,Moore,NC,-79.42954,35.13139
6,Accokeek_2,-77.020266,38.623478,Charles,MD,no evacuation,Prince George's,MD,-77.02287,38.67076
9,Adamstown,-77.413075,39.174957,Montgomery,MD,no evacuation,Frederick,MD,-77.47508,39.31130
11,Adrian_2,-82.694905,32.566149,Laurens,GA,no evacuation,Emanuel,GA,-82.59039,32.53105
12,Adrian_3,-82.661133,32.509754,Laurens,GA,no evacuation,Emanuel,GA,-82.59039,32.53105
16,Afton_3,-78.706055,37.961515,Albemarle,VA,no evacuation,Nelson,VA,-78.83915,38.03313
18,Ahoskie_1,-76.926994,36.208621,Bertie,NC,no evacuation,Hertford,NC,-76.98960,36.28959
19,Ahoskie_2,-76.995230,36.224266,Bertie,NC,no evacuation,Hertford,NC,-76.98960,36.28959
21,Aiken_3,-81.508896,33.247739,Barnwell,SC,no evacuation,Aiken,SC,-81.72214,33.56165
25,Albany_2,-82.229004,39.192518,Meigs,OH,no evacuation,Athens,OH,-82.20103,39.22491


In [175]:
regions[regions['state'] != regions['state_here']]

Unnamed: 0,region name,lon,lat,county,state,evacuation,county_here,state_here,lon_here,lat_here
78,Ararat_2,-80.543333,36.527013,Surry,NC,no evacuation,Patrick,VA,-80.51485,36.59667
272,Bloomery,-78.245841,39.23181,Frederick,VA,no evacuation,Hampshire,WV,-78.37354,39.38669
284,Bluefield_1,-81.380229,37.206102,Tazewell,VA,no evacuation,Mercer,WV,-81.22262,37.26798
321,Bracey_2,-78.095734,36.535733,Warren,NC,no evacuation,Mecklenburg,VA,-78.14607,36.5979
328,Brandywine_3,-79.175302,38.55965,Rockingham,VA,no evacuation,Pendleton,WV,-79.24439,38.62489
330,Brasstown_2,-83.979665,34.962611,Union,GA,no evacuation,Clay,NC,-83.95815,35.0395
332,Brentwood,-76.966525,38.88368,District of Columbia,DC,no evacuation,Prince George's,MD,-76.9553,38.9429
346,Bristol_5,-82.240646,36.61454,Washington,VA,no evacuation,Sullivan,TN,-82.22129,36.59218
380,Bunker Hill,-78.024493,39.25972,Clarke,VA,no evacuation,Berkeley,WV,-78.0528,39.33615
430,Cana_1,-80.602106,36.505427,Surry,NC,no evacuation,Carroll,VA,-80.67181,36.58938


In [188]:
regions = regions.drop(columns=['state', 'county', 'lon', 'lat'])

In [193]:
regions = regions.rename(columns={'state_here':'state', 'county_here':'county', 'lon_here':'lon', 'lat_here':'lat'})

Now, identify evacuation status for each city.

In [195]:
regions['evacuation'] = 'no evacuation'

In [198]:
# Beaufort, NC
regions['evacuation'][(regions['county']=='Beaufort') & (regions['state']=='NC')] = 'mandatory to voluntary'
regions['evacuation'][(regions['region name'].str.contains('bayview|belhaven|bath|whitepost|river\sroad|river\srd|washington|chocowinity|aurora', 
                                                           case=False))&(regions.county=='Beaufort')&(regions.state=='NC')]='mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [199]:
# Bertie, NC
regions['evacuation'][(regions['region name'].str.contains('colerain|whites\sbeach|ashland|avoca|merry\shill', case=False))&(regions.county=='Bertie')&(regions.state=='NC')]='voluntary'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [201]:
# Brunswick, NC
Brunswick_unincorporated = 'Sunset\sHarbor|Supply|Winnabow'
voluntary = 'Leland|Navassa'
visitor = 'Holden\sBeach|Oak\sIsland|Bald|Caswell|Ocean|Sunset|Bird'
regions['evacuation'][(regions['region name'].str.contains(visitor, case=False))&(regions.county=='Brunswick')&(regions.state=='NC')]='mandatory (visitor)'
regions['evacuation'][(regions['region name'].str.contains(Brunswick_unincorporated, case=False))&(regions.county=='Brunswick')&(regions.state=='NC')]='mandatory'
regions['evacuation'][(regions['region name'].str.contains(voluntary, case=False))&(regions.county=='Brunswick')&(regions.state=='NC')]='voluntary'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


In [202]:
# Carteret, NC
regions['evacuation'][(regions['region name'].str.contains('Atlantic\sBeach|Emerald\sIsle|Indian\sBeach|Pine\sKnoll\sShores', case=False))&(regions.county=='Carteret')&(regions.state=='NC')]='mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [203]:
# Columbus, NC
regions['evacuation'][(regions['county']=='Columbus') & (regions['state']=='NC')] = 'voluntary'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [204]:
# Craven, NC
regions['evacuation'][(regions['county']=='Craven') & (regions['state']=='NC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [205]:
# Currituck, NC
regions['evacuation'][(regions['region name'].str.contains('Corolla|Carova'))&(regions['county']=='Currituck') & (regions['state']=='NC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [206]:
# Dare, NC
regions['evacuation'][(regions['county']=='Dare') & (regions['state']=='NC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [207]:
# Duplin, NC
regions['evacuation'][(regions['county']=='Duplin') & (regions['state']=='NC')] = 'voluntary'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [208]:
# Hyde, NC
regions['evacuation'][(regions['region name'].str.contains('Ocracoke'))&(regions['county']=='Hyde') & (regions['state']=='NC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [209]:
# Jones, NC
regions['evacuation'][(regions['county']=='Jones') & (regions['state']=='NC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [210]:
# Lenoir, NC
regions['evacuation'][(regions['county']=='Lenoir') & (regions['state']=='NC')] = 'voluntary'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [211]:
# New Hanover, NC
regions['evacuation'][(regions['county']=='New Hanover') & (regions['state']=='NC')] = 'voluntary'
regions['evacuation'][(regions['region name'].str.contains('Carolina\sBeach|Kure\sBeach|Wrightsville\sBeach|Wilmington'))&(regions['county']=='New Hanover') & (regions['state']=='NC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [212]:
# Onslow, NC
voluntary = 'Half\sMoon|Petersburg|Piney\sGreen|Pumpkin\sCenter|Sneads\sFerry|Surf'
regions['evacuation'][(regions['region name'].str.contains(voluntary))&(regions['county']=='Onslow') & (regions['state']=='NC')] = 'voluntary'
regions['evacuation'][(regions['region name'].str.contains('Topsail\sBeach'))&(regions['county']=='Onslow') & (regions['state']=='NC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [213]:
# Pamlico, NC
regions['evacuation'][(regions['county']=='Pamlico') & (regions['state']=='NC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [214]:
# Pender, NC
regions['evacuation'][(regions['region name'].str.contains('Topsail|Surf|Sloop'))&(regions['county']=='Pender') & (regions['state']=='NC')] = 'voluntary'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [215]:
# Tyrrell, NC
regions['evacuation'][(regions['county']=='Tyrrell') & (regions['state']=='NC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [216]:
# SC
m_to_no = 'Beaufort|Colleton|Jasper'
mandatory = 'Dorchester|Berkeley|Charleston|Georgetown|Horry'
regions['evacuation'][(regions['county'].str.contains(m_to_no)) & (regions['state']=='SC')] = 'mandatory to no evacuation'
regions['evacuation'][(regions['county'].str.contains(mandatory)) & (regions['state']=='SC')] = 'mandatory'
regions['evacuation'][(regions['region name'].str.contains('Edisto\sBeach', case=False))&(regions['county']=='Beaufort') & (regions['state']=='SC')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [185]:
#VA
import folium
m = folium.Map(location=[35.7596, -79.0193], zoom_start=6)
names = regions['region name'].str.replace('_.+County_.+', '').str.replace('[&\']', '')
for i in regions[regions['state']=='VA'].index:
    lat = regions['lat'].loc[i]
    lon = regions['lon'].loc[i]
    folium.Circle(
        radius=200,
        location=[lat, lon],
        #tooltip = names.iloc[i],
        popup = folium.Popup(names.loc[i],parse_html=True)
    ).add_to(m)

In [186]:
m

In [217]:
ZoneA='Cape\sCharles|Chincoteague|Port\sHaywood|Quiet\sCove|Seaford_1|Poquoson_2|Gloucester_1|Water\sView|Hampton_5|Smithfield_1|Suffolk_4|Norfolk_3|Portsmouth_4|Chesapeake_2|Virginia\sBeach_3' 

In [219]:
regions['evacuation'][(regions['region name'].str.contains(ZoneA)) & (regions['state']=='VA')] = 'mandatory'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [220]:
regions.evacuation.value_counts()

no evacuation                 3030
mandatory                      118
voluntary                       39
mandatory to no evacuation      24
mandatory to voluntary           4
mandatory (visitor)              2
Name: evacuation, dtype: int64

In [221]:
regions.to_csv('cleansed_data/evacuation.csv')

In [236]:
m = folium.Map(location=[35.7596, -79.0193], zoom_start=6)
names = regions['region name'].str.replace('_.+County_.+', '').str.replace('[&\']', '')
colors = {'mandatory':'crimson', 'voluntary':'darkgreen', 'mandatory to no evacuation':'yellow',
         'mandatory to voluntary':'yellow', 'mandatory (visitor)':'black', 'no evacuation':'blue'}
for i in regions.index:
    lat = regions['lat'].loc[i]
    lon = regions['lon'].loc[i]
    folium.Circle(
        radius=100,
        location=[lat, lon],
        color=colors[regions['evacuation'].loc[i]],
        popup=folium.Popup(names.loc[i], parse_html=True)
    ).add_to(m)

In [237]:
m.save('evacuation.html')