In [None]:
import pandas as pd
import requests
from time import sleep
import os
import json
from geopy.distance import geodesic
import sys
import pickle


In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

folder_path = '/content/drive/MyDrive/ml_proj/'

data_path = folder_path + 'data/'

In [None]:
# load POIs with YELP APIs

api_key = ''
headers = {'Authorization': f'Bearer {api_key}'}

endpoint = 'https://api.yelp.com/v3/businesses/search'

def yelp_api_request(offset, term):
    params = {
        'term': [term],
        'location': 'Chicago',
        'categories': term + 's',
        'limit': 50,
        'offset': offset
    }

    response = requests.get(endpoint, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json()
        data = data['businesses']
        res = []

        for restaurant in data:
            r = [
                term,
                restaurant['rating'],
                restaurant['review_count'],
                restaurant['coordinates']['latitude'],
                restaurant['coordinates']['longitude'],
                restaurant['categories'][0]['alias'],
                restaurant['price'] if 'price' in restaurant.keys() else ''
            ]
            res.append(r)

        return res
    else:
        return []

def yelp_limit(term):
    params = {
        'term': [term],
        'location': 'Chicago',
        'categories': term + 's',
        'limit': 1,
    }

    response = requests.get(endpoint, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json()
        return data['total']


if os.path.exists(folder_path + 'data/pois.csv'):
    pois = pd.read_csv(folder_path + 'data/pois.csv')
else:
    terms = ['restaurant', 'shop', 'bar', 'museum']

    pois = []
    for term in terms:
        print(f"Looking for {term}s...")
        limit = yelp_limit(term)

        for offset in range(0, limit, 50):
            pois += yelp_api_request(offset, term)
            sleep(.4)


    pois = pd.DataFrame(pois, columns=['type', 'rating', 'review_count', 'lat', 'lon', 'category', 'price'])

    pois = pois.drop('Unnamed: 0', axis=1)
    categorical_columns = ['category', 'price', 'type']

    for c in categorical_columns:
        pois[c] = pois[c].astype('category')
        pois[c] = pois[c].cat.codes
        pois[c] = pois[c].astype('int')

    pois.to_csv(folder_path + 'data/pois.csv')

In [None]:
pois