In [1]:
import pandas as pd
import numpy as np
import os

#To plot pretty figures
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

import seaborn as sns

plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 18

plt.style.use('fivethirtyeight')

#to make this notebook's output stable across runs
np.random.seed(42)

#Where to save the figures
PROJECT_ROOT_DIR = "."
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images")
def save_fig(fig_id,tight_layout=True,fig_extension="png",resolution=400):
    if not os.path.isdir(IMAGES_PATH):
        os.makedirs(IMAGES_PATH)
    path = os.path.join(IMAGES_PATH,fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path,format=fig_extension,dpi=resolution)
    
    
PROCESSED_PATH = os.path.join(PROJECT_ROOT_DIR,"datasets/processed")

def save_processed(df,filename,extension="csv"):
    if not os.path.isdir(PROCESSED_PATH):
        os.makedirs(PROCESSED_PATH)
    path = os.path.join(PROCESSED_PATH,filename + "." + extension)
    print("Saving processed dataset",filename)
    df.to_csv(path,index=False)
    


# Download and Process Zomato Data

In [2]:
API_KEY = "0d2dff0692d6f2a55c3a714fbbcd84ed"
ENTITY_TYPE = "city"
SORT_BY = "rating"
SORT_ORDER = ["desc","asc"]
RAW_FOLDER = "datasets/raw/"
PROCESSED_FOLDER = "datasets/processed/"

In [3]:
def load_data(base_path,file,sub_dir=None,ext="csv",encoding=None):
    filename = file + "." + ext
    if sub_dir is not None:
        csv_path = os.path.join(base_path,sub_dir,filename)
    else:
        csv_path = os.path.join(base_path,filename)
    return pd.read_csv(csv_path)

In [4]:
city_ids = load_data(RAW_FOLDER,"zomato_city_ids")
city_ids.head()

Unnamed: 0,Country,City,Zomato ID
0,India,Hyderabad,6


In [5]:
import requests
import json

def get_categories():
    #Get all zomato categories.
    #Categories are gloabal and not unique to a specific country.
    headers ={"user-key":API_KEY}
    response=requests.get("https://developers.zomato.com/api/v2.1/categories", headers=headers)
    json_data = response.json()
    
    if not os.path.isdir(RAW_FOLDER):
        os.makedirs(RAW_FOLDER)
    filename = "categories.json"
    file_path = os.path.join(RAW_FOLDER,filename)
    
    with open(file_path, 'w') as outfile:
        json.dump(json_data,outfile)
        
    #return all the categories id's for restuarant search function.
    cat_ids = [category["categories"]["ids"] for category in json_data["categories"]]
    return cat_ids

def get_establishment_type_for_city(city_id):
    #Finding all establishment types in city_id and writing to file
    headers = {"user-key": API_KEY}
    params = {"city_id": city_id}
    response=requests.get("https://developers.zomato.com/api/v2.1/establishments",headers=headers,params=params)
    json_data = response.json()
    
    if not os.path.isdir(RAW_FOLDER):
        o.makedirs(RAW_FOLDER)
    file_name = "establishment_types_" + str(city_id) + "." + "json"
    file_path = os.path.join(RAW_FOLDER,file_name)
    
    with open(file_path,'w') as outfile:
        json.dump(json_data,outfile)
        
    #return all the establishment type id's for resturant search function.
    establishment_ids = [establishment["establishment"]["id"] for establishment in json_data["establishments"]]
    return establishment_ids

def get_cuisine_type_for_city(city_id):
    #Finding all cuisine types in hyderabad and writing to file
    header = {"user-key": API_KEY}
    params = {"city_ids": city_id}
    response = requests.get("https://developers.zomato.com/api/v2.1/cuisines",headers=headers,params=params)
    json_data = response.json()
                            
    if not os.path.isdir(RAW_FOLDER):
        o.makedirs(RAW_FOLDER)
    filename = "cuisine_types_" + str(city_id) + "." + "json"
    file_path = os.path.join(RAW_FOLDER,filename)
                            
    with open(filepath,'w') as outfile:
        json.dump(json_data,outfile)
                            
    #return all the cuisins type id's for restaurant search function.
    cuisine_ids = [cuisine["cuisine"]["cuisine_id"] for cuisine in json_data["cuisines"]]
    return cuisine_ids

In [6]:
def download_with_criteria(headers,city_id,file_path,iterable_list,iterable_name,sort_order,is_item_list=False):
    json_dump = []
    #if API is not expecting a list of iterable_name,then run through each value as a search criteria
    if not is_item_list:
        for item in iterable_list:
            start = 0
            results_shown = 20
            while results_shown != 0:
                params ={"entity_id": city_id,"entity_type": ENTITY_TYPE,"start": start,"count":20,iterable_name:item,"sort":SORT_BY,"order":sort_order}
                response=requests.get("https://developers.zomato.com/api/v2.1/search", headers=headers, params=params)
                json_data = response.json()
                results_shown = int(json_data.get("results_shown",0))
                if results_shown == 0:
                    break
                    
                #appending each dictionary to a list so that json.Load() can process
                #multipl dictionaries
                json_dump.append(json.dumps(json_data))
                start += 20
                
                #otherwise pass the full list to iterable_name,such as cuisines.
                #note: initially it was attempted to run through the above loop for every cuisine type, but allowed API calls
                #where exceeded for a single city download attempt
                
    else:
        start = 0
        results_shown = 20
        while results_shown !=0:
            params = {"entity_id":city_id,"entity_type":ENTITY_TYPE,"start":start,"count":20,iterable_name:iterable_list,"sort":SORT_BY,"order":sort_order}
            response = requets.get("https://developers.zomato.com/api/v2.1/search",headers=headers, params=params)
            json_data = response.json()
            results_shown = int(json_data.get("results_shown",0))
            if results_shown == 0:
                break
            #appending each dictionary to a list so that json.load() can process
            #multiple dictionaries
            json_dump.append(json.dumps(json_data))
            start +=20
            
    #each dictionary must exist as a list object for json.load to read it correctly
    with open(file_path, 'w') as outfile:
        outfile.write("[")
        count = 0
        for item in json_dump:
            outfile.write(item)
            if count <len(json_dump)-1:
                outfile.write(",")
            count +=1
        outfile.write("]")

In [7]:
def get_restaurants(city_id,establishment_types):
    #Find 100 top rated restuarants of eacg etablishment type,category and cuisine type per city and write to file
    headers = {"user-key": API_KEY}
    
    if not os.path.isdir(RAW_FOLDER):
        os.makedirs(RAW_FOLDER)
    search_criteria = ["Category","Establishment Type","Cuisine type"]
    city_name = city_ids[city_ids["Zomato ID"] == city_id]["City"].values[0]
    #Download restuarant for city by establishment type
    print("Downloading restaurants for", city_name,"by", search_criteria[1])
    for sort_order in SORT_ORDER:
            file_name = "restuarants_by_establishment_type_" + str(city_id) + "_" + sort_order + "." + "json"
            file_path = os.path.join(RAW_FOLDER,file_name)
            download_with_criteria(headers,city_id,file_path,establishment_types,"establishment_types",sort_order,is_item_list=False)
            print("Done")

In [8]:
for city_id in city_ids["Zomato ID"]:
    establishments = get_establishment_type_for_city(city_id)
    get_restaurants(city_id,establishments)

Downloading restaurants for Hyderabad by Establishment Type
Done
Done


In [33]:
#For each city,we need to combine the restuarant data from the categories-based, establishmnet type-based and cuisine-based json files.
def build_csv(city_id,source_type):
    restaurants_list = []
    for sort_order in SORT_ORDER:
        file_name ="restuarants_by_" + source_type + "_" + str(city_id) + "_" + sort_order + ".json"
        source_path = os.path.join(RAW_FOLDER,file_name)
        db = json.load(open(source_path))
        restaurant_cols = ["name",
                          "cuisines",
                          "aggregate_rating",
                          "rating_text",
                          "votes",
                          "currency",
                          "average_cost_for_two",
                          "price_range",
                          "locality",
                          "locality_verbose",
                          "city",
                          "zipcode",
                          "country_id",
                          "latitude",
                          "longitude",
                          "has_online_delivery",
                          "has_table_booking"
                          ]
        restaurants = [restaurant_group["restaurant"] for count_group in db for restaurant_group in count_group["restaurants"]]
        for restaurant in restaurants:
            restaurant["locality"] = restaurant["location"]["locality"]
            restaurant["locality_verbose"] = restaurant["location"]["locality_verbose"]
            restaurant["city"] = restaurant["location"]["city"]
            restaurant["latitude"] = restaurant["location"]["latitude"]
            restaurant["longitude"] = restaurant["location"]["longitude"]
            restaurant["zipcode"] = restaurant["location"]["zipcode"]
            restaurant["country_id"] = restaurant["location"]["country_id"]
            restaurant["aggregate_rating"] = restaurant["user_rating"]["aggregate_rating"]
            restaurant["votes"] = restaurant["user_rating"]["votes"]
            restaurant["rating_text"] = restaurant["user_rating"]["rating_text"]
            
            restaurants_list.append(restaurants)
        df1 = pd.DataFrame(restaurants_list[0],columns=restaurant_cols)
        df2 = pd.DataFrame(restaurants_list[1],columns=restaurant_cols)
        return pd.concat([df1,df2])
    
def process_restaurants_json(city_id):
    source_types = ["establishment_type"]
    dfs = []
    for source in source_types:
        dfs.append(build_csv(city_id,source))
        
    if not os.path.isdir(PROCESSED_FOLDER):
        os.makesirs(PROCESSED_FOLDER)
    
    file_name = "restaurants_" + str(city_id) + ".csv"
    output_path = os.path.join(PROCESSED_FOLDER, file_name)
    data = pd.concat(dfs)
    data.to_csv(output_path,encoding='utf-8-sig',index=False)
            

In [34]:
for city in city_ids["Zomato ID"]:
    process_restaurants_json(city)

# Data Preparation

In [77]:
data =load_data(PROCESSED_FOLDER,'restaurants_6')
data.shape

(4800, 17)

In [78]:
data.head(5)

Unnamed: 0,name,cuisines,aggregate_rating,rating_text,votes,currency,average_cost_for_two,price_range,locality,locality_verbose,city,zipcode,country_id,latitude,longitude,has_online_delivery,has_table_booking
0,Sahib’s Barbeque by Ohri’s,"Hyderabadi, Awadhi, BBQ",4.9,Excellent,7647,Rs.,1400,3,Hitech City,"Hitech City, Hyderabad",Hyderabad,,1,17.441403,78.376216,1,1
1,Chili's American Grill & Bar,"Mexican, American, Tex-Mex, Burger, Salad",4.9,Excellent,7422,Rs.,1400,3,Hitech City,"Hitech City, Hyderabad",Hyderabad,500081.0,1,17.433889,78.386653,1,1
2,Chili's American Grill & Bar,"Mexican, American, Tex-Mex, Burger, Salad",4.9,Excellent,5313,Rs.,1400,3,Banjara Hills,"Banjara Hills, Hyderabad",Hyderabad,,1,17.422829,78.449465,1,1
3,Ohri's Tansen,North Indian,4.9,Excellent,3076,Rs.,1500,3,Necklace Road,"Necklace Road, Hyderabad",Hyderabad,0.0,1,17.431671,78.465058,1,1
4,Ohri's Serengeti,North Indian,4.9,Excellent,2493,Rs.,1500,3,Banjara Hills,"Banjara Hills, Hyderabad",Hyderabad,0.0,1,17.408402,78.43888,1,1


# Remove Duplicates

Since we downloaded the top 100 and bottom 100 restauranSince we downloaded the top 100 and bottom 100 restuarants of each category, there is likely to be some duplication as certain categories contain less than 200 restaurants meaning that the same establishment could appear near the bottom of the top 100 or the top of the bottom 100. There are many restaurant franchises in Hyderabad, so we'll define a duplicate as a restaurant with the same name at the same locality.

In [79]:
data[data.duplicated(subset=["name", "cuisines", "aggregate_rating", "rating_text", "votes",
       "average_cost_for_two", "price_range", "locality", "city",
       "latitude", "longitude","has_online_delivery","has_table_booking"])]


Unnamed: 0,name,cuisines,aggregate_rating,rating_text,votes,currency,average_cost_for_two,price_range,locality,locality_verbose,city,zipcode,country_id,latitude,longitude,has_online_delivery,has_table_booking
42,Over The Moon Brew Company,"Asian, Continental, North Indian, Chinese, Med...",4.6,Excellent,3035,Rs.,1900,3,Gachibowli,"Gachibowli, Hyderabad",Hyderabad,,1,17.440206,78.362038,0,1
89,Mandi@36,Arabian,4.5,Excellent,1515,Rs.,600,2,Jubilee Hills,"Jubilee Hills, Hyderabad",Hyderabad,,1,17.430713,78.407921,1,0
100,Sahib’s Barbeque by Ohri’s,"Hyderabadi, Awadhi, BBQ",4.9,Excellent,7647,Rs.,1400,3,Hitech City,"Hitech City, Hyderabad",Hyderabad,,1,17.441403,78.376216,1,1
101,Chili's American Grill & Bar,"Mexican, American, Tex-Mex, Burger, Salad",4.9,Excellent,7422,Rs.,1400,3,Hitech City,"Hitech City, Hyderabad",Hyderabad,500081.0,1,17.433889,78.386653,1,1
102,Ohri's Tansen,North Indian,4.9,Excellent,3076,Rs.,1500,3,Necklace Road,"Necklace Road, Hyderabad",Hyderabad,0.0,1,17.431671,78.465058,1,1
103,Ohri's Serengeti,North Indian,4.9,Excellent,2493,Rs.,1500,3,Banjara Hills,"Banjara Hills, Hyderabad",Hyderabad,0.0,1,17.408402,78.438880,1,1
104,AB's - Absolute Barbecues,"European, Mediterranean, North Indian",4.9,Excellent,7164,Rs.,1200,3,Gachibowli,"Gachibowli, Hyderabad",Hyderabad,,1,17.442988,78.357400,0,1
105,AB's - Absolute Barbecues,"European, Mediterranean, North Indian",4.9,Excellent,4138,Rs.,1500,3,Banjara Hills,"Banjara Hills, Hyderabad",Hyderabad,,1,17.412350,78.449430,0,1
106,Chili's American Grill & Bar,"Mexican, American, Tex-Mex, Burger, Salad",4.9,Excellent,5313,Rs.,1400,3,Banjara Hills,"Banjara Hills, Hyderabad",Hyderabad,,1,17.422829,78.449465,1,1
107,Taaza Kitchen,South Indian,4.9,Excellent,328,Rs.,100,1,Madhapur,"Madhapur, Hyderabad",Hyderabad,,1,17.452452,78.391436,1,0


In [80]:
data.duplicated(subset=["name", "cuisines", "aggregate_rating", "rating_text", "votes",
       "average_cost_for_two", "price_range", "locality", "city",
       "latitude", "longitude","has_online_delivery","has_table_booking"]).sum()

4668

In [81]:
# duplicates are considerd those restaurants with the same name in the same location. Keep the first instance.
data = data.drop_duplicates(subset=["name", "cuisines", "aggregate_rating", "rating_text", "votes",
       "average_cost_for_two", "price_range", "locality", "city",
       "latitude", "longitude","has_online_delivery","has_table_booking"], keep='first')
data.shape

(132, 17)

# Removing Unneccessary Columns

In [82]:
#removing currency, country, locality_verbose and zipcode
cols = ['name', 'cuisines', 'aggregate_rating', 'rating_text', 'votes',
       'average_cost_for_two', 'price_range', 'locality', 'city',
       'latitude', 'longitude', 'has_online_delivery', 'has_table_booking']
data = data[cols]
data.head()

Unnamed: 0,name,cuisines,aggregate_rating,rating_text,votes,average_cost_for_two,price_range,locality,city,latitude,longitude,has_online_delivery,has_table_booking
0,Sahib’s Barbeque by Ohri’s,"Hyderabadi, Awadhi, BBQ",4.9,Excellent,7647,1400,3,Hitech City,Hyderabad,17.441403,78.376216,1,1
1,Chili's American Grill & Bar,"Mexican, American, Tex-Mex, Burger, Salad",4.9,Excellent,7422,1400,3,Hitech City,Hyderabad,17.433889,78.386653,1,1
2,Chili's American Grill & Bar,"Mexican, American, Tex-Mex, Burger, Salad",4.9,Excellent,5313,1400,3,Banjara Hills,Hyderabad,17.422829,78.449465,1,1
3,Ohri's Tansen,North Indian,4.9,Excellent,3076,1500,3,Necklace Road,Hyderabad,17.431671,78.465058,1,1
4,Ohri's Serengeti,North Indian,4.9,Excellent,2493,1500,3,Banjara Hills,Hyderabad,17.408402,78.43888,1,1


# Checking for null values

In [83]:
#checking for Null values In dataset 
data.isnull().sum(axis=0)

name                    0
cuisines                0
aggregate_rating        0
rating_text             0
votes                   0
average_cost_for_two    0
price_range             0
locality                0
city                    0
latitude                0
longitude               0
has_online_delivery     0
has_table_booking       0
dtype: int64

# Add Dummy Columns for Each Cuisine Type

We'd like to have a true/false value for each cuisine type. This will make it easier to count restaurants by specific cuisine types later. Zomato piles all the cuisines together in a single comma seperated field, so we need to break this up first and then create dummies.

In [84]:
# Values in cuisine columns may have multiple values with ',' seperator
# Extract each possible cuisine type then ensure each dummy column is prefixed with "cuisine_"

dummies = data['cuisines'].str.get_dummies(sep=", ")
dummies.columns = [str(col) for col in dummies.columns]
data = pd.concat([data, dummies], axis=1)

In [85]:
data.shape

(132, 59)

In [86]:
data.head()

Unnamed: 0,name,cuisines,aggregate_rating,rating_text,votes,average_cost_for_two,price_range,locality,city,latitude,...,Salad,Sandwich,Seafood,South Indian,Street Food,Sushi,Tex-Mex,Thai,Tibetan,Vietnamese
0,Sahib’s Barbeque by Ohri’s,"Hyderabadi, Awadhi, BBQ",4.9,Excellent,7647,1400,3,Hitech City,Hyderabad,17.441403,...,0,0,0,0,0,0,0,0,0,0
1,Chili's American Grill & Bar,"Mexican, American, Tex-Mex, Burger, Salad",4.9,Excellent,7422,1400,3,Hitech City,Hyderabad,17.433889,...,1,0,0,0,0,0,1,0,0,0
2,Chili's American Grill & Bar,"Mexican, American, Tex-Mex, Burger, Salad",4.9,Excellent,5313,1400,3,Banjara Hills,Hyderabad,17.422829,...,1,0,0,0,0,0,1,0,0,0
3,Ohri's Tansen,North Indian,4.9,Excellent,3076,1500,3,Necklace Road,Hyderabad,17.431671,...,0,0,0,0,0,0,0,0,0,0
4,Ohri's Serengeti,North Indian,4.9,Excellent,2493,1500,3,Banjara Hills,Hyderabad,17.408402,...,0,0,0,0,0,0,0,0,0,0


# Extract a Place of Interest from Locality and Store in New Column

Some restaurants are part of a place of interest such as a shopping mall or hotel. This is specified in the locality as a comma-seperated pair. We'll split these out into their own column and use "None" where it doesn't apply. This allows us to investigate rating trends at specific shopping malls, for example

In [87]:
# Go through each row and extract a place value from the locality if one is specified
# Store the place and locality seperately in order to filter more precisely

places = []
localities = []
def split_locality(row):
    items = row["locality"].split(',')
    if len(items) > 1:
        places.append(items[0])
        localities.append(items[1].strip())
    else:
        places.append("None")
        localities.append(items[0].strip())
        
       
data.apply(split_locality, axis=1)
data["places"] = places
data["locality"] = localities

In [88]:
data[["locality", "places"]].sample(5)

Unnamed: 0,locality,places
57,Shamirpet,Aalankrita Resort
84,Kukatpally,
19,Basheer Bagh,
31,Banjara Hills,
77,Jubilee Hills,


# Add Column to Store The Count of Cuisines Types Offered

In [89]:
data.columns

Index(['name', 'cuisines', 'aggregate_rating', 'rating_text', 'votes',
       'average_cost_for_two', 'price_range', 'locality', 'city', 'latitude',
       'longitude', 'has_online_delivery', 'has_table_booking', 'American',
       'Andhra', 'Arabian', 'Asian', 'Awadhi', 'BBQ', 'Bakery', 'Beverages',
       'Biryani', 'Burger', 'Burmese', 'Cafe', 'Chinese', 'Coffee',
       'Continental', 'Desserts', 'European', 'Fast Food', 'Gujarati',
       'Healthy Food', 'Hyderabadi', 'Ice Cream', 'Italian', 'Japanese',
       'Juices', 'Lebanese', 'Mediterranean', 'Mexican', 'Modern Indian',
       'Momos', 'Mughlai', 'North Indian', 'Parsi', 'Pizza', 'Rajasthani',
       'Rolls', 'Salad', 'Sandwich', 'Seafood', 'South Indian', 'Street Food',
       'Sushi', 'Tex-Mex', 'Thai', 'Tibetan', 'Vietnamese', 'places'],
      dtype='object')

In [90]:
def get_cuisine_cols():
    cols = ["name", "cuisines", "aggregate_rating", "rating_text", "votes",
       "average_cost_for_two", "price_range", "locality", "city",
       "latitude", "longitude","has_online_delivery","has_table_booking","places"]
    #Returns the column names of all the cuisine type column, not including the cuisine_count column
    cuisine_cols = [col for col in data.columns.tolist() if col not in cols]
    return cuisine_cols

def count_cuisines(row):
    count = len([col for col in data.columns.tolist() if col in get_cuisine_cols() if row[col] == True])
    return count
data["cuisine_count"] = data.apply(count_cuisines,axis=1)
data.sample(5)

Unnamed: 0,name,cuisines,aggregate_rating,rating_text,votes,average_cost_for_two,price_range,locality,city,latitude,...,Seafood,South Indian,Street Food,Sushi,Tex-Mex,Thai,Tibetan,Vietnamese,places,cuisine_count
96,Barbeque Pride,"North Indian, Chinese, Beverages",4.5,Excellent,2597,1200,3,Jubilee Hills,Hyderabad,17.434935,...,0,0,0,0,0,0,0,0,,3
6,AB's - Absolute Barbecues,"European, Mediterranean, North Indian",4.9,Excellent,4138,1500,3,Banjara Hills,Hyderabad,17.41235,...,0,0,0,0,0,0,0,0,,3
923,The Devil's Cut,"North Indian, Chinese, Continental",4.7,Excelente,399,1550,3,Jubilee Hills,Hyderabad,17.425642,...,0,0,0,0,0,0,0,0,,3
774,eat.fit,"Healthy Food, North Indian, Biryani, Continent...",4.5,Skvělé,371,400,1,Jubilee Hills,Hyderabad,17.437264,...,0,0,0,0,0,0,0,0,,5
1241,Coffee Cup,"Cafe, Continental, Italian",4.6,Excelente,4097,800,2,Sainikpuri,Secunderabad,17.483157,...,0,0,0,0,0,0,0,0,,3


In [91]:
data[["cuisines","cuisine_count"]].sample(10)

Unnamed: 0,cuisines,cuisine_count
876,"Bakery, Fast Food",2
927,North Indian,1
71,"Biryani, North Indian",2
12,"Desserts, Cafe, Italian",3
49,"Asian, Mediterranean, North Indian, Desserts",4
2172,"Ice Cream, Desserts",2
1487,"Continental, Chinese, Mughlai",3
696,"North Indian, Chinese, Beverages",3
1241,"Cafe, Continental, Italian",3
86,"Continental, Chinese, Mughlai",3


# Adding Place Type identifier based on Parsed Locality Field

Some of our localities were listed in Zomato as a comma-seperated value. This implied that the location was a place of interest such as a mall or hotel which contains multiple restaurants. We already seperated this into places and locality fields, now we'd like to further identify the place by type such as Hotel, Wine Estate etc. so that we can further filter and visualise this data.

In order to obtain these values, a manual google search will be done to properly classify the place type

In [92]:
places_type_mapping = {
    "Inorbit Mall":"Mall",
    "12th Square Building":"Mall",
    "The Golkonda Resorts & Spa":"Resort",
    "Aalankrita Resort":"Resort",
    "Hotel Baseraa":"Hotel",
    "GVK One Mall":"Mall",
    "The Park":"Hotel",
    "Taj Falaknuma":"Hotel"

}

In [93]:

def map_places_to_place_type(row):
    return places_type_mapping.get(row['places'], "None")

data["place_type"] = data.apply(map_places_to_place_type, axis=1)

In [94]:
data.place_type.value_counts()

None      120
Mall        6
Hotel       4
Resort      2
Name: place_type, dtype: int64

In [95]:
# Order data by rating desc and export to csv
data.sort_values("aggregate_rating", ascending=False, inplace=False).to_csv('datasets/processed/zomato-final.csv', 
                                                                            encoding='utf-8-sig', index=False)