# Geocoding

In [11]:
import requests

def geocode(address):
    res = requests.get(f"https://geocode.xyz/{address}", params={"json":1})
    data = res.json()
    return {
        "type":"Point",
        "coordinates":[float(data["longt"]),float(data["latt"])]
    }

In [12]:
# https://geojson.io/#map=16/40.3959/-3.7039
# https://geojson.org/
ironhack = "Paseo de la chopera 14 Madrid"
geocode(ironhack)

{'type': 'Point', 'coordinates': [-3.7011, 40.39652]}

# Mongodb queries

In [13]:
# https://docs.mongodb.com/manual/reference/operator/query/near/#op._S_near

In [14]:
from pymongo import MongoClient

client = MongoClient("mongodb://localhost/datamad0620")
db = client.get_database()

In [46]:
import re

n = re.compile("^face",re.IGNORECASE)
companies = db.companies.find({"name":n},{"offices":1, "name":1})

In [47]:
import pandas as pd

df = pd.DataFrame(list(companies))
df

Unnamed: 0,_id,name,offices
0,52cdef7c4bab8bd675297d8e,Facebook,"[{'description': 'Headquarters', 'address1': '..."
1,52cdef7c4bab8bd675298334,FaceTec,"[{'description': None, 'address1': 'Liverpool ..."
2,52cdef7d4bab8bd675299497,Face Your Manga,[]
3,52cdef7d4bab8bd675299d49,Facebookster,"[{'description': 'Facebookster US Office', 'ad..."
4,52cdef7d4bab8bd67529a044,FACEinHOLE,"[{'description': 'Headquarters', 'address1': '..."
5,52cdef7d4bab8bd67529a054,Facebook Causes Application,[]
6,52cdef7d4bab8bd67529a1b9,FaceKoo,"[{'description': '', 'address1': 'No. 18, Daji..."
7,52cdef7e4bab8bd67529a7c9,facetime,[]
8,52cdef7e4bab8bd67529aaa9,facelovefinder,"[{'description': '', 'address1': 'Manzanos 151..."
9,52cdef7e4bab8bd67529ab2a,FACE Africa,"[{'description': 'Main Office', 'address1': '9..."


In [55]:
offices = df.explode("offices").apply(lambda e: e.offices,axis=1,result_type="expand")
offices

Unnamed: 0,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,Headquarters,1601 Willow Road,,94025,Menlo Park,CA,USA,37.41605,-122.151801
0,Europe HQ,,,,Dublin,,IRL,53.344104,-6.267494
0,New York,340 Madison Ave,,10017,New York,NY,USA,40.755716,-73.979247
1,,Liverpool Science Park,131 Mount Pleasant,,Liverpool,,GBR,,
2,,,,,,,,,
3,Facebookster US Office,44288 Fremont Blvd.,,94538,Fremont,CA,USA,37.506206,-121.949247
4,Headquarters,,"Rua Serpa Pinto, nº 1",1200-442,Lisbon,,PRT,38.707163,-9.135517
5,,,,,,,,,
6,,"No. 18, Dajie,",Chaoyang District,100020,Beijing,,CHN,,
7,,,,,,,,,


In [63]:
clean_offices = pd.concat([df[["name","_id"]], offices], axis=1)
clean_offices

Unnamed: 0,name,_id,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,Facebook,52cdef7c4bab8bd675297d8e,Headquarters,1601 Willow Road,,94025,Menlo Park,CA,USA,37.41605,-122.151801
0,Facebook,52cdef7c4bab8bd675297d8e,Europe HQ,,,,Dublin,,IRL,53.344104,-6.267494
0,Facebook,52cdef7c4bab8bd675297d8e,New York,340 Madison Ave,,10017,New York,NY,USA,40.755716,-73.979247
1,FaceTec,52cdef7c4bab8bd675298334,,Liverpool Science Park,131 Mount Pleasant,,Liverpool,,GBR,,
2,Face Your Manga,52cdef7d4bab8bd675299497,,,,,,,,,
3,Facebookster,52cdef7d4bab8bd675299d49,Facebookster US Office,44288 Fremont Blvd.,,94538,Fremont,CA,USA,37.506206,-121.949247
4,FACEinHOLE,52cdef7d4bab8bd67529a044,Headquarters,,"Rua Serpa Pinto, nº 1",1200-442,Lisbon,,PRT,38.707163,-9.135517
5,Facebook Causes Application,52cdef7d4bab8bd67529a054,,,,,,,,,
6,FaceKoo,52cdef7d4bab8bd67529a1b9,,"No. 18, Dajie,",Chaoyang District,100020,Beijing,,CHN,,
7,facetime,52cdef7e4bab8bd67529a7c9,,,,,,,,,


In [83]:
import numpy as np

def transformToGeoPoint(s):
    if np.isnan(s.latitude) or np.isnan(s.longitude):
        return None
    return {
        "type":"Point",
        "coordinates":[s.longitude, s.latitude]
    }
    

clean_offices["geopoint"] = clean_offices.apply(transformToGeoPoint, axis=1)

In [84]:
clean_offices

Unnamed: 0,name,_id,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude,geopoint
0,Facebook,52cdef7c4bab8bd675297d8e,Headquarters,1601 Willow Road,,94025,Menlo Park,CA,USA,37.41605,-122.151801,"{'type': 'Point', 'coordinates': [-122.151801,..."
0,Facebook,52cdef7c4bab8bd675297d8e,Europe HQ,,,,Dublin,,IRL,53.344104,-6.267494,"{'type': 'Point', 'coordinates': [-6.267494, 5..."
0,Facebook,52cdef7c4bab8bd675297d8e,New York,340 Madison Ave,,10017,New York,NY,USA,40.755716,-73.979247,"{'type': 'Point', 'coordinates': [-73.9792469,..."
1,FaceTec,52cdef7c4bab8bd675298334,,Liverpool Science Park,131 Mount Pleasant,,Liverpool,,GBR,,,
2,Face Your Manga,52cdef7d4bab8bd675299497,,,,,,,,,,
3,Facebookster,52cdef7d4bab8bd675299d49,Facebookster US Office,44288 Fremont Blvd.,,94538,Fremont,CA,USA,37.506206,-121.949247,"{'type': 'Point', 'coordinates': [-121.949247,..."
4,FACEinHOLE,52cdef7d4bab8bd67529a044,Headquarters,,"Rua Serpa Pinto, nº 1",1200-442,Lisbon,,PRT,38.707163,-9.135517,"{'type': 'Point', 'coordinates': [-9.135517, 3..."
5,Facebook Causes Application,52cdef7d4bab8bd67529a054,,,,,,,,,,
6,FaceKoo,52cdef7d4bab8bd67529a1b9,,"No. 18, Dajie,",Chaoyang District,100020,Beijing,,CHN,,,
7,facetime,52cdef7e4bab8bd67529a7c9,,,,,,,,,,


In [88]:
# Transform all companies documents to office documents

companies = db.companies.find({},{"offices":1, "name":1})
df = pd.DataFrame(list(companies))
offices = df.explode("offices").apply(lambda e: e.offices,axis=1,result_type="expand")
clean_offices = pd.concat([df[["name","_id"]], offices], axis=1)
clean_offices["geopoint"] = clean_offices.apply(transformToGeoPoint, axis=1)


Unnamed: 0,name,_id,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude,geopoint
0,Zoho,52cdef7c4bab8bd675297d8c,Headquarters,4900 Hopyard Rd,Suite 310,94588,Pleasanton,CA,USA,37.692934,-121.904945,"{'type': 'Point', 'coordinates': [-121.904945,..."
1,Wetpaint,52cdef7c4bab8bd675297d8a,,710 - 2nd Avenue,Suite 1100,98104,Seattle,WA,USA,47.603122,-122.333253,"{'type': 'Point', 'coordinates': [-122.333253,..."
1,Wetpaint,52cdef7c4bab8bd675297d8a,,270 Lafayette Street,Suite 505,10012,New York,NY,USA,40.723731,-73.996431,"{'type': 'Point', 'coordinates': [-73.9964312,..."
2,AdventNet,52cdef7c4bab8bd675297d8b,Headquarters,4900 Hopyard Rd.,Suite 310,94588,Pleasanton,CA,USA,37.692934,-121.904945,"{'type': 'Point', 'coordinates': [-121.904945,..."
3,Postini,52cdef7c4bab8bd675297d90,,"959 Skyway Road, Suite 200",,94070,San Carlos,CA,USA,37.506885,-122.247573,"{'type': 'Point', 'coordinates': [-122.247573,..."


In [90]:
clean_offices = clean_offices.rename(columns={"_id":"company_id"})
clean_offices.head()

Unnamed: 0,name,company_id,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude,geopoint
0,Zoho,52cdef7c4bab8bd675297d8c,Headquarters,4900 Hopyard Rd,Suite 310,94588,Pleasanton,CA,USA,37.692934,-121.904945,"{'type': 'Point', 'coordinates': [-121.904945,..."
1,Wetpaint,52cdef7c4bab8bd675297d8a,,710 - 2nd Avenue,Suite 1100,98104,Seattle,WA,USA,47.603122,-122.333253,"{'type': 'Point', 'coordinates': [-122.333253,..."
1,Wetpaint,52cdef7c4bab8bd675297d8a,,270 Lafayette Street,Suite 505,10012,New York,NY,USA,40.723731,-73.996431,"{'type': 'Point', 'coordinates': [-73.9964312,..."
2,AdventNet,52cdef7c4bab8bd675297d8b,Headquarters,4900 Hopyard Rd.,Suite 310,94588,Pleasanton,CA,USA,37.692934,-121.904945,"{'type': 'Point', 'coordinates': [-121.904945,..."
3,Postini,52cdef7c4bab8bd675297d90,,"959 Skyway Road, Suite 200",,94070,San Carlos,CA,USA,37.506885,-122.247573,"{'type': 'Point', 'coordinates': [-122.247573,..."


In [97]:
# Convert objectIds to string to be able to export as JSON 

clean_offices["company_id"] = clean_offices["company_id"].apply(lambda e: str(e))

# Export as json records to import with mongoimport 
clean_offices.to_json("data/offices.json",orient="records")

# mongoimport -d datamad0620 -c offices --jsonArray offices.json

## Mongodb geospartial `$near`

In [100]:
times_square = geocode("Times Square, New York")
times_square

{'type': 'Point', 'coordinates': [-73.98964, 40.75677]}

```
{
   <location field>: {
     $near: {
       $geometry: {
          type: "Point" ,
          coordinates: [ <longitude> , <latitude> ]
       },
       $maxDistance: <distance in meters>,
       $minDistance: <distance in meters>
     }
   }
}
```

In [102]:
def geoQueryNear(point,radius=10000):
    return {
        "geopoint":{
            "$near": {
                "$geometry": point,
                "$maxDistance": radius,
                "$minDistance": 0
            }
        }
    }

In [115]:
q = db.offices.find(geoQueryNear(times_square, radius=3000),{"_id":0,"company_id":0})
near_offices = list(q)
len(near_offices)

441

In [116]:
df = pd.DataFrame(near_offices)
df.to_json("data/offices_near_times_square.json",orient="records")