In [1]:
# Normal stack of pandas, numpy, matplotlib and seaborn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
import seaborn as sns

import os
import json

# Factual API
from factual import Factual # https://github.com/Factual/factual-python-driver
from factual.utils import circle

%matplotlib inline

Factual API TODOs in order to make it work on your localhost:
 1. run in terminal "pip install factual-api" without ""
 2. sign up for a Factual API key here: https://www.factual.com/contact/new#free_api_access
 3. Set up your key and secret as environment variables - FACTUAL_API_KEY and FACTUAL_API_SECRET respectively
    
    3.1. On mac you just open .bash_profile and add two lines: 
     
     export FACTUAL_API_KEY=your_key
     
     export FACTUAL_API_SECRET=your_secret
     
    3.2. Restart shell and run jupyter notebook in this newly restarted shell (terminal)
     
 4. ...profit ;)

In [4]:
# You can verify that your env variables are ok with:
print(os.environ['FACTUAL_API_KEY'])
print(os.environ['FACTUAL_API_SECRET'])

wEydNeOyUWVBpgZjlKKlqFzODVf3FEj5LcltoIVt
ACxDy5tJQJAEPfgZidA4GQOKE9TaJmCJWSwBoasb


In [5]:
factual = Factual(os.environ['FACTUAL_API_KEY'], os.environ['FACTUAL_API_SECRET'])

In [6]:
places = factual.table('places-ch')
places.schema()

{'description': 'Businesses and Places of Interest',
 'fields': [{'datatype': 'String',
   'description': 'Business/POI name',
   'faceted': False,
   'label': 'Name',
   'multivalued': False,
   'name': 'name',
   'q_searchable': True,
   'searchable': True,
   'sortable': True,
   'writable': True},
  {'datatype': 'String',
   'description': 'Address number and street name',
   'faceted': False,
   'label': 'Address',
   'multivalued': False,
   'name': 'address',
   'q_searchable': True,
   'searchable': True,
   'sortable': True,
   'writable': True},
  {'datatype': 'String',
   'description': 'Additional address, incl. suite numbers',
   'faceted': False,
   'label': 'Address Extended',
   'multivalued': False,
   'name': 'address_extended',
   'q_searchable': False,
   'searchable': True,
   'sortable': False,
   'writable': True},
  {'datatype': 'String',
   'description': 'PO Box. As they do not represent the physical location of a brick-and-mortar store, PO Boxes are often exc

In [7]:
places.data()

[{'address': 'Binningerstrasse 40',
  'category_ids': [371],
  'category_labels': [['Social', 'Zoos, Aquariums and Wildlife Sanctuaries']],
  'country': 'ch',
  'factual_id': '6aa834ab-8ed1-459a-93ea-4ce2223c26ae',
  'fax': '061 281 00 05',
  'hours': {'friday': [['8:00', '17:00']],
   'monday': [['5:00', '7:00'], ['10:00', '15:00']],
   'saturday': [['9:00', '16:00']],
   'sunday': [['8:00', '17:00']],
   'thursday': [['10:00', '15:00']],
   'tuesday': [['6:00', '7:00'], ['10:00', '16:00']],
   'wednesday': [['11:00', '16:00']]},
  'hours_display': 'Mon 5:00-7:00, 10:00-15:00; Tue 6:00-7:00, 10:00-16:00; Wed 11:00-16:00; Thu 10:00-15:00; Fri 8:00-17:00; Sat 9:00-16:00; Sun 8:00-17:00',
  'latitude': 47.547416,
  'locality': 'Basel',
  'longitude': 7.578764,
  'name': 'Zoo Basel',
  'postcode': '4054',
  'region': 'Basel-Stadt',
  'tel': '061 295 35 35',
  'website': 'http://www.zoobasel.ch/'},
 {'address': 'Grossmünsterplatz',
  'category_ids': [55],
  'category_labels': [['Community 

In [8]:
the_great_escape = places.search('The Great Escape').data()
print(json.dumps(the_great_escape, indent=2))

[
  {
    "category_labels": [
      [
        "Social",
        "Bars"
      ],
      [
        "Social",
        "Food and Dining",
        "Restaurants"
      ]
    ],
    "address": "Rue Madeleine 18",
    "email": "bar.contact@the-great.ch",
    "region": "Vaud",
    "hours_display": "Mon-Fri 00:00-1:00, 11:00-23:59; Sat-Sun 00:00-2:00, 12:00-23:59",
    "locality": "Lausanne",
    "website": "http://www.the-great.ch/",
    "country": "ch",
    "longitude": 6.63313,
    "tel": "021 312 31 94",
    "category_ids": [
      312,
      347
    ],
    "factual_id": "5dce5f96-01d6-4709-9da4-bf22b0049577",
    "hours": {
      "tuesday": [
        [
          "00:00",
          "1:00"
        ],
        [
          "11:00",
          "23:59"
        ]
      ],
      "sunday": [
        [
          "00:00",
          "2:00"
        ],
        [
          "12:00",
          "23:59"
        ]
      ],
      "thursday": [
        [
          "00:00",
          "1:00"
        ],
        [
   

In [9]:
def square(point1, point2):
    """
    Creates an API-ready square from the given latitudes, longitudes.
    """
    return {'$within': {'$rect': [point1, point2]}}

In [77]:
#Coffe near EPFL
# for radius of 100 near The Great Escape (Lausanne) we end up with 253 places
# for radius of 100 near The Bahnhofstassewe end up with 248 places
#epfl_coffe = places.geo(circle(47.374556, 8.538635, 100)).offset(200).limit(50).data()
#zürich dense area between points [47.375854, 8.537451],[47.373142, 8.540137] we get an empty last offset and 28 in the second last
zurich = places.geo(square([47.375854, 8.537451],[47.373142, 8.540137])).offset(400).limit(50).data()
print(json.dumps(zurich, indent=2))

[
  {
    "fax": "043 497 23 71",
    "category_labels": [
      [
        "Retail",
        "Fashion",
        "Clothing and Accessories"
      ]
    ],
    "address": "Uraniastrasse 4",
    "region": "Z\u00fcrich",
    "locality": "Z\u00fcrich",
    "postcode": "8001",
    "longitude": 8.539404,
    "tel": "043 960 92 05",
    "factual_id": "fd2e3998-2a47-425b-af6d-b73caab99608",
    "category_ids": [
      143
    ],
    "country": "ch",
    "name": "M O D A E N G M B H",
    "latitude": 47.374609
  },
  {
    "category_labels": [
      [
        "Retail",
        "Fashion",
        "Clothing and Accessories"
      ]
    ],
    "address": "Rennweg 36",
    "locality": "Z\u00fcrich",
    "postcode": "8001",
    "longitude": 8.539346,
    "tel": "043 497 26 60",
    "factual_id": "0bc02737-4465-474e-aa6e-3c8dfac712e3",
    "category_ids": [
      143
    ],
    "latitude": 47.373359,
    "name": "Satform S.A.",
    "country": "ch",
    "region": "Z\u00fcrich"
  },
  {
    "category_la

In [78]:
len(zurich)

28

In [12]:
import math

def measure(point1, point2):
    lat1, lon1 = point1[0], point1[1]
    lat2, lon2 = point2[0], point2[1]
    R = 6378.137; 
    dLat = lat2 * math.pi / 180 - lat1 * math.pi / 180
    dLon = lon2 * math.pi / 180 - lon1 * math.pi / 180
    a = math.sin(dLat/2) * math.sin(dLat/2) + \
    math.cos(lat1 * math.pi / 180) * math.cos(lat2 * math.pi / 180) * \
    math.sin(dLon/2) * math.sin(dLon/2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    d = R * c;
    return d * 1000;

In [38]:
measure([47.374556, 8.538635],[47.3717392,8.5409286])

358.07903293419145

In [79]:
measure([47.375854, 8.537451],[47.373142, 8.540137])

363.51551631028434

LAUSANE MVP TODOs:
 - Create a dict with coordinates and radius for Lausanne
 - Write up a script scraping API across those coordinates AND
 - write up a system to store scrapped coordinates and untag them from future scrapping


_________
General TODOs:

 - We gotta figure out how we would like to divide Switzerland into small pieces (squares or circles) such that we will be able to extract all spots provided by factual without actually hitting its limit ("3 Page limit=50 is the maximum number of rows that can be returned in a single request using the limit parameter. Row limit=500 is the maximum depth a request can page into using (e.g., offset + limit).").
 - Since we wont be able to squeeze it in 10 000 places/day we also need to figure out a way to run script on multiple machines (using few API-KEYS) and on the course of multiple days.