In [None]:
import sys
import pprint
import requests
import ast
from collections import defaultdict
import pandas as pd
from pandas.io.json import json_normalize
import pickle
from functools import reduce
import pickle
import random

In [28]:
base_url = "https://developers.zomato.com/api/v2.1/"
api_keys = ["c1197244ad4605df38ec5e827e15a7b2",
            "86b123235ea063a6ac39687bbca355f0",
            "f819919aea69efed517042f48e1be4f2"]
NUM_CUISINES = 50

In [3]:
def initialize_app(config):
    return Zomato(config)


class Zomato:
    def __init__(self, config):
        self.user_key = config["user_key"]

    def get_categories(self):
        """
        Takes no input.
        Returns a dictionary of IDs and their respective category names.
        """
        headers = {'Accept': 'application/json', 'user-key': self.user_key}
        r = (requests.get(base_url + "categories", headers=headers).content).decode("utf-8")
        a = ast.literal_eval(r)

        self.is_key_invalid(a)
        self.is_rate_exceeded(a)

        categories = {}
        for category in a['categories']:
            categories.update({category['categories']['id'] : category['categories']['name']})

        return categories

    def get_city_ID(self, city_name):
        """
        Takes City Name as input.
        Returns the ID for the city given as input.
        """
        city_name = city_name.split(' ')
        city_name = '%20'.join(city_name)
        headers = {'Accept': 'application/json', 'user-key': self.user_key}
        r = (requests.get(base_url + "cities?q=" + city_name, headers=headers).content).decode("utf-8")
        a = ast.literal_eval(r)

        self.is_key_invalid(a)
        self.is_rate_exceeded(a)

        if len(a['location_suggestions']) == 0:
            raise Exception('invalid_city_name')
        elif 'name' in a['location_suggestions'][0]:
            city_name = city_name.replace('%20', ' ')
            if str(city_name).lower() in str(a['location_suggestions'][0]['name']).lower():
                return a['location_suggestions'][0]['id']
            else:
                raise ValueError('InvalidCityId')
                
    def get_city_name(self, city_ID):
        """
        Takes City ID as input.
        Returns the name of the city ID given as input.
        """
        self.is_valid_city_id(city_ID)

        headers = {'Accept': 'application/json', 'user-key': self.user_key}
        r = (requests.get(base_url + "cities?city_ids=" + str(city_ID), headers=headers).content).decode("utf-8")
        a = ast.literal_eval(r)

        self.is_key_invalid(a)
        self.is_rate_exceeded(a)

        if a['location_suggestions'][0]['country_name'] == "":
            raise ValueError('InvalidCityId')
        else:
            temp_city_ID = a['location_suggestions'][0]['id']
            if temp_city_ID == city_ID:
                return a['location_suggestions'][0]['name']

    def get_collections(self, city_ID, limit=None):
        """
        Takes City ID as input. limit parameter is optional.
        Returns dictionary of Zomato restaurant collections in a city and their respective URLs.
        """
        self.is_valid_city_id(city_ID)

        headers = {'Accept': 'application/json', 'user-key': self.user_key}
        if limit == None:
            r = (requests.get(base_url + "collections?city_id=" + str(city_ID), headers=headers).content).decode("utf-8")
        else:
            if str(limit).isalpha() == True:
                raise ValueError('LimitNotInteger')
            else:
                r = (requests.get(base_url + "collections?city_id=" + str(city_ID) + "&count=" + str(limit), headers=headers).content).decode("utf-8")
        a = ast.literal_eval(r)

        self.is_key_invalid(a)
        self.is_rate_exceeded(a)

        collections = {}
        for collection in a['collections']:
            collections.update({collection['collection']['title'] : collection['collection']['url']})

        return collections

    def get_cuisines(self, city_ID):
        """
        Takes City ID as input.
        Returns a sorted dictionary of all cuisine IDs and their respective cuisine names.
        """
        self.is_valid_city_id(city_ID)

        headers = {'Accept': 'application/json', 'user-key': self.user_key}
        r = (requests.get(base_url + "cuisines?city_id=" + str(city_ID), headers=headers).content).decode("utf-8")
        a = ast.literal_eval(r)

        self.is_key_invalid(a)
        self.is_rate_exceeded(a)

        if len(a['cuisines']) == 0:
            raise ValueError('InvalidCityId')
        temp_cuisines = {}
        cuisines = {}
        for cuisine in a['cuisines']:
            temp_cuisines.update({cuisine['cuisine']['cuisine_id'] : cuisine['cuisine']['cuisine_name']})

        for cuisine in sorted(temp_cuisines):
            cuisines.update({cuisine : temp_cuisines[cuisine]})

        return cuisines

    def get_establishment_types(self, city_ID):
        """
        Takes City ID as input.
        Returns a sorted dictionary of all establishment type IDs and their respective establishment type names.
        """
        self.is_valid_city_id(city_ID)

        headers = {'Accept': 'application/json', 'user-key': self.user_key}
        r = (requests.get(base_url + "establishments?city_id=" + str(city_ID), headers=headers).content).decode("utf-8")
        a = ast.literal_eval(r)

        self.is_key_invalid(a)
        self.is_rate_exceeded(a)

        temp_establishment_types = {}
        establishment_types = {}
        if 'establishments' in a:
            for establishment_type in a['establishments']:
                temp_establishment_types.update({establishment_type['establishment']['id'] : establishment_type['establishment']['name']})

            for establishment_type in sorted(temp_establishment_types):
                establishment_types.update({establishment_type : temp_establishment_types[establishment_type]})

            return establishment_types
        else:
            raise ValueError('InvalidCityId')

    def get_nearby_restaurants(self, latitude, longitude):
        """
        Takes the latitude and longitude as inputs.
        Returns a dictionary of Restaurant IDs and their corresponding Zomato URLs.
        """
        try:
            float(latitude)
            float(longitude)
        except ValueError:
            raise ValueError('InvalidLatitudeOrLongitude')

        headers = {'Accept': 'application/json', 'user-key': self.user_key}
        r = (requests.get(base_url + "geocode?lat=" + str(latitude) + "&lon=" + str(longitude), headers=headers).content).decode("utf-8")
        a = ast.literal_eval(r)

        nearby_restaurants = {}
        for nearby_restaurant in a['nearby_restaurants']:
            nearby_restaurants.update({nearby_restaurant['restaurant']['id'] : nearby_restaurant['restaurant']['url']})

        return nearby_restaurants

    def get_restaurant(self, restaurant_ID):
        """
        Takes Restaurant ID as input.
        Returns a dictionary of restaurant details.
        """
        self.is_valid_restaurant_id(restaurant_ID)

        headers = {'Accept': 'application/json', 'user-key': self.user_key}
        r = (requests.get(base_url + "restaurant?res_id=" + str(restaurant_ID), headers=headers).content).decode("utf-8")
        a = ast.literal_eval(r)

        if 'code' in a:
            if a['code'] == 404:
                raise('InvalidRestaurantId')

        restaurant_details = {}
        restaurant_details.update({"name" : a['name']})
        restaurant_details.update({"url" : a['url']})
        restaurant_details.update({"location" : a['location']['address']})
        restaurant_details.update({"city" : a['location']['city']})
        restaurant_details.update({"city_ID" : a['location']['city_id']})
        restaurant_details.update({"user_rating" : a['user_rating']['aggregate_rating']})

        restaurant_details = DotDict(restaurant_details)
        return restaurant_details

    def restaurant_search(self, entity_id="", entity_type="", latitude="", longitude="",
                          cuisines="",
                          start=0, limit=1000):
        """
        Takes either query, latitude and longitude or cuisine as input.
        Returns a list of Restaurant IDs.
        """
        cuisines = "%2C".join(cuisines.split(","))
        if str(limit).isalpha() == True:
            raise ValueError('LimitNotInteger')
        headers = {'Accept': 'application/json', 'user-key': self.user_key}
        r = requests.get(base_url + 
                         "search?entity_id=" + str(entity_id) +
                         "&entity_type=" + entity_type +
                         "&start=" + str(start) + 
                         "&count=" + str(limit) + 
                         "&lat=" + str(latitude) +
                         "&lon=" + str(longitude) + 
                         "&cuisines=" + str(cuisines), headers=headers)
        a = r.json()
        try:
            if a['results_found'] == 0:
                return []
            else:
                return a['restaurants']
        except:
            print(a)

    def is_valid_restaurant_id(self, restaurant_ID):
        """
        Checks if the Restaurant ID is valid or invalid.
        If invalid, throws a InvalidRestaurantId Exception.
        """
        restaurant_ID = str(restaurant_ID)
        if restaurant_ID.isnumeric() == False:
            raise ValueError('InvalidRestaurantId')

    def is_valid_city_id(self, city_ID):
        """
        Checks if the City ID is valid or invalid.
        If invalid, throws a InvalidCityId Exception.
        """
        city_ID = str(city_ID)
        if city_ID.isnumeric() == False:
            raise ValueError('InvalidCityId')

    def is_key_invalid(self, a):
        """
        Checks if the API key provided is valid or invalid.
        If invalid, throws a InvalidKey Exception.
        """
        if 'code' in a:
            if a['code'] == 403:
                raise ValueError('InvalidKey')



    def is_rate_exceeded(self, a):
        """
        Checks if the request limit for the API key is exceeded or not.
        If exceeded, throws a ApiLimitExceeded Exception.
        """
        if 'code' in a:
            if a['code'] == 440:
                raise Exception('ApiLimitExceeded')

## Cities

In [9]:
city_names = ['Las Vegas', 'Phoenix', 'Toronto', 'Charlotte', 'Scotsdale', 'Calgary',
              'Pittsburg', 'Mesa', 'Montreal', 'Henderson', 'Cleveland', 'Mesa', 'Madidson']

In [11]:
city2id = {}
for city_name in city_names:
    try:
        city2id[city_name] = zomato.get_city_ID(city_name)
    except Exception as e:
        print(e)
        print(city_name)
print()
print(city2id)

invalid_city_name
Madidson


## Cuisines

In [24]:
id2cuisine = zomato.get_cuisines(city2id['Las Vegas'])
cuisine2id = {v: k for k, v in cuisine2id.items()}
len(cuisine2id)

99

In [29]:
assert len(city2id) * 5 * NUM_CUISINES < len(api_keys) * 999
cuisine_ids = random.sample(list(cuisine2id.values()), num_cuisines)
fast_food_id = cuisine2id['Fast Food']
if fast_food_id not in cuisine_ids:
    cuisine_ids.pop()
    cuisine_ids += [fast_food_id]
print(len(cuisine_ids), fast_food_id in cuisine_ids)

50 True


## Restaurants

In [None]:
city2res = defaultdict(list)
num_res = 0
api_idx = 0

for city_name, city_id in city2id.items():
    for cuisine_id in cuisine_ids:
        for start in range(0, 100, 20):
            try:
                city2res[city_name].extend(zomato.restaurant_search(entity_id=city_id,
                                                                    entity_type='city',
                                                                    start=start,
                                                                    cuisines=str(cuisine_id),
                                                                    limit=1000))
                print(city_name, len(city2res[city_name]))
                num_res += 20
            except ApiLimitExceeded as e:
                print(e, num_res)
                api_idx += 1
                config={"user_key": api_keys[api_idx]}
                zomato = initialize_app(config)

Las Vegas 20
Las Vegas 40
Las Vegas 60
Las Vegas 80
Las Vegas 100
Las Vegas 120
Las Vegas 140
Las Vegas 160
Las Vegas 180
Las Vegas 200
Las Vegas 206
Las Vegas 206
Las Vegas 206
Las Vegas 206
Las Vegas 206
Las Vegas 226
Las Vegas 246
Las Vegas 249
Las Vegas 249
Las Vegas 249
Las Vegas 269
Las Vegas 279
Las Vegas 279
Las Vegas 279
Las Vegas 279
Las Vegas 283
Las Vegas 283
Las Vegas 283
Las Vegas 283
Las Vegas 283
Las Vegas 285
Las Vegas 285
Las Vegas 285
Las Vegas 285
Las Vegas 285
Las Vegas 305
Las Vegas 325
Las Vegas 345
Las Vegas 365
Las Vegas 385
Las Vegas 405
Las Vegas 405
Las Vegas 405
Las Vegas 405
Las Vegas 405
Las Vegas 425
Las Vegas 445
Las Vegas 465
Las Vegas 485
Las Vegas 505
Las Vegas 506
Las Vegas 506
Las Vegas 506
Las Vegas 506
Las Vegas 506
Las Vegas 516
Las Vegas 516
Las Vegas 516
Las Vegas 516
Las Vegas 516
Las Vegas 536
Las Vegas 556
Las Vegas 576
Las Vegas 596
Las Vegas 616
Las Vegas 619
Las Vegas 619
Las Vegas 619
Las Vegas 619
Las Vegas 619
Las Vegas 622
Las Vegas 

In [5]:
with open('../data/zomato/parsed_data.pickle', 'rb') as f:
    data = pickle.load(f)

In [23]:
full_json_data = []
for city, restaurants in data.items():
    for restaurant in restaurants:
        record = restaurant['restaurant'].copy()
        record.pop('R', None)
        record.pop('featured_image', None)
        record.pop('thumb', None)
        record.pop('apikey', None)
        record.pop('deeplink', None)
        keys = list(record.keys()).copy()
        for k in keys:
            if 'url' in k:
                record.pop(k, None)

        normal_record = json_normalize(record)
        full_json_data.append(normal_record)

In [24]:
df = pd.concat(full_json_data)

In [25]:
for col in df.columns:
    print(col)

average_cost_for_two
cuisines
currency
establishment_types
has_online_delivery
has_table_booking
id
include_bogo_offers
is_book_form_web_view
is_delivering_now
is_table_reservation_supported
is_zomato_book_res
location.address
location.city
location.city_id
location.country_id
location.latitude
location.locality
location.locality_verbose
location.longitude
location.zipcode
medio_provider
mezzo_provider
name
offers
opentable_support
price_range
switch_to_order_menu
user_rating.aggregate_rating
user_rating.rating_color
user_rating.rating_text
user_rating.votes


In [41]:
df.applymap(lambda x: x == '').sum()

average_cost_for_two               0
cuisines                           9
currency                           0
establishment_types                0
has_online_delivery                0
has_table_booking                  0
id                                 0
include_bogo_offers                0
is_book_form_web_view              0
is_delivering_now                  0
is_table_reservation_supported     0
is_zomato_book_res                 0
location.address                   0
location.city                      0
location.city_id                   0
location.country_id                0
location.latitude                  0
location.locality                  0
location.locality_verbose          0
location.longitude                 0
location.zipcode                  31
medio_provider                     3
mezzo_provider                     0
name                               0
offers                             0
opentable_support                  0
price_range                        0
s

In [39]:
df.isnull().sum()

average_cost_for_two                0
cuisines                            0
currency                            0
establishment_types                 0
has_online_delivery                 0
has_table_booking                   0
id                                  0
include_bogo_offers                 0
is_book_form_web_view               0
is_delivering_now                   0
is_table_reservation_supported      0
is_zomato_book_res                  0
location.address                    0
location.city                       0
location.city_id                    0
location.country_id                 0
location.latitude                   0
location.locality                   0
location.locality_verbose           0
location.longitude                  0
location.zipcode                    0
medio_provider                    933
mezzo_provider                      0
name                                0
offers                              0
opentable_support                   0
price_range 

In [258]:
df['location.city'].value_counts()

Las Vegas           100
Toronto             100
Los Angeles         100
San Francisco       100
Mesa                100
San Jose            100
Baltimore           100
San Antonio         100
Columbus            100
El Paso             100
Phoenix             100
Denver              100
Memphis             100
Jacksonville        100
Fort Worth          100
Calgary             100
New York City       100
Washington DC       100
Milwaukee           100
San Diego           100
Houston             100
Philadelphia        100
Dallas              100
Charlotte           100
Boston              100
Seattle             100
Montreal            100
Chicago             100
Austin              100
Detroit             100
Henderson            69
Pittsburg            62
Evansville           29
Denmark              21
Antioch              13
Concord               8
Clayton               5
Walpole               4
Mt Barker             4
Bay Point             3
Brentwood             2
Mount Barker    