In [1]:
import geocoder

In [2]:
import pandas as pd

df = pd.read_csv('./documenters_aggregator/local_outputs/chi_police_20180312_2010.csv')

In [3]:
import geocoder

"""
This pipeline decorates items with latitude and longitude by geocoding their
addresses.
"""
class MapboxPipeline(object):
    """
    Stub pipeline to geocode using Mapbox.
    """
    def __init__(self):
        # the geocoder library. this way we can use other libraries if needed or write/extend our own
        self.engine = 'geocoder' 
        # api key, mapbox in this case. best practice would be to hide this
        self.key = 'pk.eyJ1IjoiZWFzaGVybWEiLCJhIjoiY2oxcW51Nzk2MDBkbTJxcGUxdm85bW5xayJ9.7mL0wQ7cjifWwt5DrXMuJA' #API 

    """
    Process an item.
    """
    def process_item(self, item, spider):
        if item['location']['coordinates']['latitude'] is None:
            response = self.get_geocoder_query('1811 N francisco ave, Chicago, Illinois, 60647')
            item['location']['coordinates']['latitude'] = response.lat
            item['location']['coordinates']['longitude'] = response.lng
            item['location']['url'] = response.url
        return item


    def get_geocoder_query(self, address=None, bbox=None):

        if self.engine == 'geocoder':
            provider = 'mapbox'
            params = {
            'engine': self.engine,
            'provider': provider,
            'key': self.key


            }
            query_params = {

                'address': address,
                'bbox': bbox # bounds to search in, useful if we dont have city/zip in address          
            }
            
#             query = "{engine}.{provider}('{address}', key='{key}')".format(**params)
            
            result = getattr(geocoder, provider)(query_params['address'], key = params['key'], **query_params)
            return result, str(query_params)
        return query



In [4]:
geo = MapboxPipeline()


In [5]:
test_raw = geo.get_geocoder_query(df['location_address'][0])
test_bbox = geo.get_geocoder_query(df['location_address'][0], bbox=[-87.940102,41.643921,-87.523987,42.023022])
test_city_state = geo.get_geocoder_query(df['location_address'][0] + ',Chicago,Illinois')

In [6]:
geo.get_geocoder_query(df['location_address'][0], bbox=[-87.940102,41.643921,-87.523987,42.023022])

(<[OK] Mapbox - Geocode [3115 North Mason Avenue, Chicago, Illinois 60634, United States]>,
 '{\'address\': "St. Ferdinand\'s 3115 N Mason", \'bbox\': [-87.940102, 41.643921, -87.523987, 42.023022]}')

In [7]:
import json
json.dumps(test[0].geojson)

NameError: name 'test' is not defined

In [8]:
import folium 

bbox = {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "type": "Polygon",
        "coordinates": 
[[[-87.940102,41.643921],[-87.523987,41.643921],[-87.523987,42.023022],[-87.940102,42.023022],[-87.940102,41.643921]]]
      }
    }
  ]
}



m = folium.Map(
    location=[42.023022, -87.523987],
    tiles='Mapbox Bright',
    zoom_start=1
)

folium.GeoJson(
    test_raw[0].geojson,
    name='test_raw'
).add_to(m)

folium.GeoJson(
    test_bbox[0].geojson,
    name='test_bbox'
).add_to(m)

folium.GeoJson(
    test_city_state[0].geojson,
    name='test_city_state'
).add_to(m)

folium.GeoJson(
    bbox,
    name='bbox'
).add_to(m)

folium.LayerControl().add_to(m)

m

In [None]:
testing2 = geocoder.mapbox('3115 North Mason Avenue, Chicago, Illinois 60634, United States', key= geo.key)

In [None]:
#clearly not great results
for r in test_raw[0]:
    print(r.address, r.latlng, r.confidence, r.quality, r.accuracy)

In [None]:
#quality is low, but there is only one result (and it appears to be correct)
for r in test_bbox[0]:
    print(r.address, r.latlng, r.confidence, r.quality, r.accuracy)
    


In [None]:
for r in test_city_state[0]:
    print(r.address, r.latlng, r.confidence, r.quality, r.accuracy)

In [None]:
#we could feed the highest quality parsed result from the first one and run that again/through other providers to get more confident
testing2 = geocoder.mapbox('3115 North Mason Avenue, Chicago, Illinois 60634, United States', key= geo.key)
for r in testing2:
    print(r.address, r.latlng, r.confidence, r.quality, r.accuracy)