# Geocoding

In [19]:
import requests

def geocode(address):
    '''
    Use geocode api to do forward geocoding. https://geocode.xyz/api
    '''
    res = requests.get(f"https://geocode.xyz/{address}",params={"json":1})
    data = res.json()
    print(res)
    # Return as GeoJSON -> https://geojson.org/
    return {
        "type":"Point",
        "coordinates": [float(data["longt"]), float(data["latt"])]
    }

In [2]:
ironhack = "Paseo de la chopera 14 Madrid"
geocode(ironhack)

{'type': 'Point', 'coordinates': [-3.7011, 40.39652]}

# Mongodb Geoqueries

## Preparing data...

In [3]:
from pymongo import MongoClient
client = MongoClient("mongodb://localhost/datamad0320")
db = client.get_database()

In [4]:
fb = db.companies.find_one({"name":"Facebook"},{"offices":1})

In [5]:
import pandas as pd

pd.DataFrame(fb["offices"])

Unnamed: 0,description,address1,address2,zip_code,city,state_code,country_code,latitude,longitude
0,Headquarters,1601 Willow Road,,94025.0,Menlo Park,CA,USA,37.41605,-122.151801
1,Europe HQ,,,,Dublin,,IRL,53.344104,-6.267494
2,New York,340 Madison Ave,,10017.0,New York,NY,USA,40.755716,-73.979247


In [6]:
all_offices = list(db.companies.find({},{"offices":1,"name":1,"category_code":1}))

In [7]:
companydata = pd.DataFrame(all_offices)
companydata

Unnamed: 0,_id,name,category_code,offices
0,52cdef7c4bab8bd675297d8b,AdventNet,enterprise,"[{'description': 'Headquarters', 'address1': '..."
1,52cdef7c4bab8bd675297d8d,Digg,news,"[{'description': None, 'address1': '135 Missis..."
2,52cdef7c4bab8bd675297d8c,Zoho,software,"[{'description': 'Headquarters', 'address1': '..."
3,52cdef7c4bab8bd675297d8a,Wetpaint,web,"[{'description': '', 'address1': '710 - 2nd Av..."
4,52cdef7c4bab8bd675297d8f,Omnidrive,network_hosting,"[{'description': '', 'address1': 'Suite 200', ..."
...,...,...,...,...
18796,52cdef7f4bab8bd67529c6f5,Oriact,software,"[{'description': '', 'address1': 'LÃ¤rchenweg ..."
18797,52cdef7f4bab8bd67529c6f8,goBookmaker,web,[]
18798,52cdef7f4bab8bd67529c6f7,AfterLogic,software,"[{'description': 'Livingston', 'address1': 'P...."
18799,52cdef7f4bab8bd67529c6f9,EnteGreat Solutions,software,"[{'description': '', 'address1': '', 'address2..."


In [8]:
companydata = companydata.explode("offices")
companydata

Unnamed: 0,_id,name,category_code,offices
0,52cdef7c4bab8bd675297d8b,AdventNet,enterprise,"{'description': 'Headquarters', 'address1': '4..."
1,52cdef7c4bab8bd675297d8d,Digg,news,"{'description': None, 'address1': '135 Mississ..."
2,52cdef7c4bab8bd675297d8c,Zoho,software,"{'description': 'Headquarters', 'address1': '4..."
3,52cdef7c4bab8bd675297d8a,Wetpaint,web,"{'description': '', 'address1': '710 - 2nd Ave..."
3,52cdef7c4bab8bd675297d8a,Wetpaint,web,"{'description': '', 'address1': '270 Lafayette..."
...,...,...,...,...
18796,52cdef7f4bab8bd67529c6f5,Oriact,software,"{'description': '', 'address1': 'LÃ¤rchenweg 7..."
18797,52cdef7f4bab8bd67529c6f8,goBookmaker,web,
18798,52cdef7f4bab8bd67529c6f7,AfterLogic,software,"{'description': 'Livingston', 'address1': 'P.O..."
18799,52cdef7f4bab8bd67529c6f9,EnteGreat Solutions,software,"{'description': '', 'address1': '', 'address2'..."


In [9]:
# transform office object into GeoPoint for office
def officeToGeoPoint(row):
    office = row.offices
    if type(office) == dict:
        if 'latitude' in office and 'longitude' in office:
            if(type(office["latitude"])) == float and type(office["longitude"]) == float:
                return ({
                    "type":"Point",
                    "coordinates":[office["longitude"],office["latitude"]]
                },"success")
            else:
                return(None,"Invalid lat lat and long")
        else:
            return (None,"No lat and long keys in office dict")
    return (None,"No office")


In [10]:
cleaned_offices = companydata.apply(officeToGeoPoint,axis=1, result_type="expand")
cleaned_offices.columns = ["office","clean_state"]

cleaned_offices

Unnamed: 0,office,clean_state
0,"{'type': 'Point', 'coordinates': [-121.904945,...",success
1,"{'type': 'Point', 'coordinates': [-122.394523,...",success
2,"{'type': 'Point', 'coordinates': [-121.904945,...",success
3,"{'type': 'Point', 'coordinates': [-122.333253,...",success
3,"{'type': 'Point', 'coordinates': [-73.9964312,...",success
...,...,...
18796,"{'type': 'Point', 'coordinates': [8.4371634, 4...",success
18797,,No office
18798,"{'type': 'Point', 'coordinates': [-74.3235539,...",success
18799,"{'type': 'Point', 'coordinates': [-86.816068, ...",success


In [11]:
company_processed = pd.concat([companydata,cleaned_offices], axis=1)

In [12]:
company_processed = company_processed[["name","category_code","office","clean_state"]]


In [13]:
company_processed 

Unnamed: 0,name,category_code,office,clean_state
0,AdventNet,enterprise,"{'type': 'Point', 'coordinates': [-121.904945,...",success
1,Digg,news,"{'type': 'Point', 'coordinates': [-122.394523,...",success
2,Zoho,software,"{'type': 'Point', 'coordinates': [-121.904945,...",success
3,Wetpaint,web,"{'type': 'Point', 'coordinates': [-122.333253,...",success
3,Wetpaint,web,"{'type': 'Point', 'coordinates': [-73.9964312,...",success
...,...,...,...,...
18796,Oriact,software,"{'type': 'Point', 'coordinates': [8.4371634, 4...",success
18797,goBookmaker,web,,No office
18798,AfterLogic,software,"{'type': 'Point', 'coordinates': [-74.3235539,...",success
18799,EnteGreat Solutions,software,"{'type': 'Point', 'coordinates': [-86.816068, ...",success


In [14]:
company_processed.clean_state.value_counts()

success                     10834
Invalid lat lat and long     5871
No office                    5057
Name: clean_state, dtype: int64

In [15]:
# Export as json to do mongodb import
# $ mongoimport --db datamad0320 --collection companies_prepared --jsonArray companies_clean.json
company_processed.to_json("data/companies_clean.json",orient="records")

## Query mongodb database with `$where` operator

In [16]:
def getOfficeNear(address, maxDist=1000):
    point = geocode(address)
    return {
       "office": {
         "$near": {
           "$geometry": point,
           "$maxDistance": maxDist,
         }
       }
    }

In [20]:
query = getOfficeNear("Times Square New York")
query

<Response [200]>


{'office': {'$near': {'$geometry': {'type': 'Point',
    'coordinates': [-73.98964, 40.75677]},
   '$maxDistance': 1000}}}

In [21]:
cur = db.companies_prepared.find(query, {"_id":0})
print(cur.count())
result = list(cur)
result

183


  


[{'name': 'Real Time Content',
  'category_code': 'advertising',
  'office': {'type': 'Point', 'coordinates': [-73.990286, 40.755959]},
  'clean_state': 'success'},
 {'name': 'ideeli',
  'category_code': 'ecommerce',
  'office': {'type': 'Point', 'coordinates': [-73.990396, 40.755978]},
  'clean_state': 'success'},
 {'name': 'Magnetic',
  'category_code': 'advertising',
  'office': {'type': 'Point', 'coordinates': [-73.990152, 40.758343]},
  'clean_state': 'success'},
 {'name': 'MindSmack',
  'category_code': 'games_video',
  'office': {'type': 'Point', 'coordinates': [-73.989987, 40.758492]},
  'clean_state': 'success'},
 {'name': 'MYSTYLEPOST',
  'category_code': 'network_hosting',
  'office': {'type': 'Point', 'coordinates': [-73.989622, 40.754901]},
  'clean_state': 'success'},
 {'name': 'Thomson Reuters',
  'category_code': 'public_relations',
  'office': {'type': 'Point', 'coordinates': [-73.9871847, 40.7564318]},
  'clean_state': 'success'},
 {'name': 'Thomson Reuters',
  'categ

In [23]:
def easyLatLng(row):
    of = row.office
    return {
        "latitude":of["coordinates"][1],
        "longitude":of["coordinates"][0]
    }

# https://api.mongodb.com/python/current/api/bson/objectid.html
df = pd.DataFrame(result)

df = pd.concat([df, df.apply(easyLatLng, axis=1, result_type="expand")], axis=1)
df

Unnamed: 0,name,category_code,office,clean_state,latitude,longitude
0,Real Time Content,advertising,"{'type': 'Point', 'coordinates': [-73.990286, ...",success,40.755959,-73.990286
1,ideeli,ecommerce,"{'type': 'Point', 'coordinates': [-73.990396, ...",success,40.755978,-73.990396
2,Magnetic,advertising,"{'type': 'Point', 'coordinates': [-73.990152, ...",success,40.758343,-73.990152
3,MindSmack,games_video,"{'type': 'Point', 'coordinates': [-73.989987, ...",success,40.758492,-73.989987
4,MYSTYLEPOST,network_hosting,"{'type': 'Point', 'coordinates': [-73.989622, ...",success,40.754901,-73.989622
...,...,...,...,...,...,...
178,Aleri,software,"{'type': 'Point', 'coordinates': [-73.9783534,...",success,40.759219,-73.978353
179,Warner Music Group,other,"{'type': 'Point', 'coordinates': [-73.9783534,...",success,40.759219,-73.978353
180,SheZoom,network_hosting,"{'type': 'Point', 'coordinates': [-73.9785086,...",success,40.753864,-73.978509
181,Social Median,web,"{'type': 'Point', 'coordinates': [-73.982575, ...",success,40.749630,-73.982575


In [24]:
df.to_json("data/query_ts.json",orient="records")

In [None]:
# https://python-visualization.github.io/folium/
# https://carto.com/developers/cartoframes/