In [1]:
import geocoder

In [2]:
import pandas as pd

df = pd.read_csv('./documenters_aggregator/local_outputs/chi_police_20180312_2010.csv')

In [3]:
import geocoder

"""
This pipeline decorates items with latitude and longitude by geocoding their
addresses.
"""
class MapboxPipeline(object):
    """
    Stub pipeline to geocode using Mapbox.
    """
    def __init__(self):
        # the geocoder library. this way we can use other libraries if needed or write/extend our own
        self.engine = 'geocoder' 
        # api key, mapbox in this case. best practice would be to hide this
        self.key = 'pk.eyJ1IjoiZWFzaGVybWEiLCJhIjoiY2oxcW51Nzk2MDBkbTJxcGUxdm85bW5xayJ9.7mL0wQ7cjifWwt5DrXMuJA' #API 

    """
    Process an item.
    """
    def process_item(self, item, spider):
        if item['location']['coordinates']['latitude'] is None:
            response = self.get_geocoder_query('1811 N francisco ave, Chicago, Illinois, 60647')
            item['location']['coordinates']['latitude'] = response.lat
            item['location']['coordinates']['longitude'] = response.lng
            item['location']['url'] = response.url
        return item


    def get_geocoder_query(self, address=None, bbox=None):

        if self.engine == 'geocoder':
            provider = 'mapbox'
            params = {
            'engine': self.engine,
            'provider': provider,
            'key': self.key


            }
            query_params = {

                'address': address,
                'bbox': bbox # bounds to search in, useful if we dont have city/zip in address          
            }
            
#             query = "{engine}.{provider}('{address}', key='{key}')".format(**params)
            
            result = getattr(geocoder, provider)(query_params['address'], key = params['key'], **query_params)
            return result, str(query_params)
        return query



In [4]:
geo = MapboxPipeline()


In [5]:
test_raw = geo.get_geocoder_query(df['location_address'][0])
test_bbox = geo.get_geocoder_query(df['location_address'][0], bbox=[-87.940102,41.643921,-87.523987,42.023022])
test_city_state = geo.get_geocoder_query(df['location_address'][0] + ',Chicago,Illinois')

In [6]:
geo.get_geocoder_query(df['location_address'][0], bbox=[-87.940102,41.643921,-87.523987,42.023022])

(<[OK] Mapbox - Geocode [3115 North Mason Avenue, Chicago, Illinois 60634, United States]>,
 '{\'address\': "St. Ferdinand\'s 3115 N Mason", \'bbox\': [-87.940102, 41.643921, -87.523987, 42.023022]}')

In [8]:
import json
json.dumps(test_raw[0].geojson)

'{"type": "FeatureCollection", "features": [{"type": "Feature", "properties": {"address": "Santander, Colombia", "bbox": [-74.519425, 5.711409, -72.476863, 8.161466], "confidence": 1, "country": "Colombia", "lat": 6.938039, "lng": -73.273945, "ok": true, "quality": 0.3333333333333333, "raw": {"id": "region.219453", "type": "Feature", "place_type": ["region"], "relevance": 0.3333333333333333, "properties": {"short_code": "CO-SAN", "wikidata": "Q235166"}, "text": "Santander", "place_name": "Santander, Colombia", "bbox": [-74.519425, 5.711409, -72.476863, 8.161466], "center": [-73.273945, 6.938039], "geometry": {"type": "Point", "coordinates": [-73.273945, 6.938039]}, "context": [{"id": "country.395", "short_code": "co", "wikidata": "Q739", "text": "Colombia"}], "country": "Colombia"}, "status": "OK"}, "bbox": [-74.519425, 5.711409, -72.476863, 8.161466], "geometry": {"type": "Point", "coordinates": [-73.273945, 6.938039]}}, {"type": "Feature", "properties": {"address": "Sulawesi Tengah, 

In [9]:
import folium 

bbox = {
  "type": "FeatureCollection",
  "features": [
    {
      "type": "Feature",
      "properties": {},
      "geometry": {
        "type": "Polygon",
        "coordinates": 
[[[-87.940102,41.643921],[-87.523987,41.643921],[-87.523987,42.023022],[-87.940102,42.023022],[-87.940102,41.643921]]]
      }
    }
  ]
}



m = folium.Map(
    location=[42.023022, -87.523987],
    tiles='Mapbox Bright',
    zoom_start=1
)

folium.GeoJson(
    test_raw[0].geojson,
    name='test_raw'
).add_to(m)

folium.GeoJson(
    test_bbox[0].geojson,
    name='test_bbox'
).add_to(m)

folium.GeoJson(
    test_city_state[0].geojson,
    name='test_city_state'
).add_to(m)

folium.GeoJson(
    bbox,
    name='bbox'
).add_to(m)

folium.LayerControl().add_to(m)

m

In [10]:
testing2 = geocoder.mapbox('3115 North Mason Avenue, Chicago, Illinois 60634, United States', key= geo.key)

In [11]:
#clearly not great results
for r in test_raw[0]:
    print(r.address, r.latlng, r.confidence, r.quality, r.accuracy)

Santander, Colombia [6.938039, -73.273945] 1 0.3333333333333333 None
Sulawesi Tengah, Indonesia [-0.945041, 122.399544] 1 0.3333333333333333 None
Santiago, Dominican Republic [19.338071, -70.964806] 1 0.3333333333333333 None
Surat Thani, Thailand [9.052613, 99.115455] 1 0.3333333333333333 None
Sachsen-Anhalt, Germany [51.990429, 11.558208] 1 0.3333333333333333 None


In [12]:
#quality is low, but there is only one result (and it appears to be correct)
for r in test_bbox[0]:
    print(r.address, r.latlng, r.confidence, r.quality, r.accuracy)
    


3115 North Mason Avenue, Chicago, Illinois 60634, United States [41.938369, -87.774988] 0 0.3333333333333333 interpolated


In [13]:
for r in test_city_state[0]:
    print(r.address, r.latlng, r.confidence, r.quality, r.accuracy)

3115 North Mason Avenue, Chicago, Illinois 60634, United States [41.938369, -87.774988] 0 0.7 interpolated
Chicago, Illinois, United States [41.8756, -87.6244] 1 0.5 None
Illinois Medical District, Chicago, Illinois 60612, United States [41.87, -87.67] 7 0.49 None
Mason, Illinois, United States [38.9531, -88.6237] 1 0.4 None
Illinois, United States [39.739182, -89.451439] 1 0.25 None


In [14]:
#we could feed the highest quality parsed result from the first one and run that again/through other providers to get more confident
testing2 = geocoder.mapbox('3115 North Mason Avenue, Chicago, Illinois 60634, United States', key= geo.key)
for r in testing2:
    print(r.address, r.latlng, r.confidence, r.quality, r.accuracy)

3115 North Mason Avenue, Chicago, Illinois 60634, United States [41.938369, -87.774988] 0 0.8999999999999999 interpolated
North Mason Avenue, Chicago, Illinois 60630, United States [41.97332, -87.776688] 0 0.6666666666666666 None
North Mason Avenue, Chicago, Illinois 60646, United States [42.009994, -87.773919] 0 0.6666666666666666 None
North Mason Avenue, Chicago, Illinois 60644, United States [41.883724, -87.773671] 0 0.6666666666666666 None
Chicago, Illinois, United States [41.8756, -87.6244] 1 0.6000000000000001 None
