<a href="https://colab.research.google.com/github/Devvrat53/Restaurant-Analysis/blob/main/Restaurant_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installation

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
#!pip install --upgrade pip
#!pip install folium
#!pip install pyspark

## Import

In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import format_number, avg, desc, count, asc
from pyspark.sql.types import * # Data types in PySpark
import matplotlib.pyplot as plt
import folium
import seaborn as sns

In [4]:
spark = SparkSession.builder.appName('Restaurant-Data-Analysis').getOrCreate()

In [5]:
df_path = '/content/drive/MyDrive/BDA-Mini-Project/Dataset/Zomato Kaggle/Zomato India Restaurants (2 Lakh + restaurants data)/data/indian_restaurants_details_cleaned_data.csv'
df = spark.read.format('csv').option('header', True).option('inferSchema', True).load(df_path)

In [6]:
df.show(5)
print("Total Records in the dataset= ", df.count())

+--------------------+--------------------+---------+------------+------+------------+--------------------+--------------------+------------+--------------------+--------------------+------------+-----------------+-------------+--------------------+-------------+-------------+
|          zomato_url|                name|     city|        area|rating|rating_count|           telephone|              cusine|cost_for_two|             address|             timings|online_order|table_reservation|delivery_only|         famous_food|    longitude|     latitude|
+--------------------+--------------------+---------+------------+------+------------+--------------------+--------------------+------------+--------------------+--------------------+------------+-----------------+-------------+--------------------+-------------+-------------+
|https://www.zomat...|         Sainik Food|Delhi NCR|Pandav Nagar|   3.2|        21.0|011 22486474 +91 ...|        North Indian|       300.0|C 4/1, Opposite M...|{'Mo

In [7]:
# Describing the Schema of the dataset
df.printSchema()

root
 |-- zomato_url: string (nullable = true)
 |-- name: string (nullable = true)
 |-- city: string (nullable = true)
 |-- area: string (nullable = true)
 |-- rating: string (nullable = true)
 |-- rating_count: string (nullable = true)
 |-- telephone: string (nullable = true)
 |-- cusine: string (nullable = true)
 |-- cost_for_two: string (nullable = true)
 |-- address: string (nullable = true)
 |-- timings: string (nullable = true)
 |-- online_order: string (nullable = true)
 |-- table_reservation: string (nullable = true)
 |-- delivery_only: string (nullable = true)
 |-- famous_food: string (nullable = true)
 |-- longitude: string (nullable = true)
 |-- latitude: string (nullable = true)



In [8]:
# Columns in the dataset
df.columns

['zomato_url',
 'name',
 'city',
 'area',
 'rating',
 'rating_count',
 'telephone',
 'cusine',
 'cost_for_two',
 'address',
 'timings',
 'online_order',
 'table_reservation',
 'delivery_only',
 'famous_food',
 'longitude',
 'latitude']

In [9]:
# Shape of the dataset i.e. the rows and the columns
shape = (df.count(), len(df.columns))
print("The Shape of the dataset= ", shape)

The Shape of the dataset=  (224854, 17)


In [10]:
# Selecting some columns for displaying
df.select('name', 'city', 'area', 'rating', 'cusine', 'cost_for_two', 'longitude', 'latitude').show()

+--------------------+---------+--------------------+------+--------------------+------------+-------------+-------------+
|                name|     city|                area|rating|              cusine|cost_for_two|    longitude|     latitude|
+--------------------+---------+--------------------+------+--------------------+------------+-------------+-------------+
|         Sainik Food|Delhi NCR|        Pandav Nagar|   3.2|        North Indian|       300.0|77.2848711535|28.6177324058|
|Kunal's Creamery ...|   Mumbai|           Ambernath|   3.6|Street Food, Chin...|       500.0|73.1842865422|19.2058869331|
|Brij Palace Resta...|Delhi NCR|              Jasola|  null|        North Indian|       250.0|77.2912229598|28.5630343606|
|         Sahib Hotel|Delhi NCR|           Paharganj|  null|        North Indian|       300.0|77.2182980552| 28.642410638|
|            Chunky's|  Kolkata|             Shibpur|   3.0|Italian, Pizza, C...|       500.0|88.3307084441|22.5777582163|
|      The Food 

## Pre-processing

In [11]:
# Drop unwanted column
df = df.drop('timings', 'famous_food')

In [12]:
# Statistical Properties of the dataframe
#df.select('rating', 'rating_count', 'cost_for_two', 'online_order', 'table_reservation', 'delivery_only').describe().show(5)

In [13]:
# Cleaned way
df.select(format_number(df['rating'].cast('float'), 2).alias('rating'),
         format_number(df['rating_count'].cast('float'), 2).alias('rating_count'),
         format_number(df['cost_for_two'].cast('float'), 2).alias('cost_for_two'),
         format_number(df['online_order'].cast('float'), 2).alias('online_order'),
         format_number(df['table_reservation'].cast('float'), 2).alias('table_reservation'),
         format_number(df['delivery_only'].cast('float'), 2).alias('delivery_only')).describe().show()

+-------+-------------------+------------------+------------------+------------------+-----------------+------------------+
|summary|             rating|      rating_count|      cost_for_two|      online_order|table_reservation|     delivery_only|
+-------+-------------------+------------------+------------------+------------------+-----------------+------------------+
|  count|             144736|            142398|            220989|                13|                8|                 4|
|   mean| 3.4880858943179107|101.55640338362659| 357.0730075769587|37.573076923076925|         31.31875|31.747500000000002|
| stddev|0.41998586322159115|161.58949123456605|179.71917421709884|30.744909981061742|28.89633809597927|37.322661011776745|
|    min|               0.00|              0.00|              0.00|              0.00|             0.00|              1.00|
|    max|               4.90|            999.00|            999.00|              9.96|            77.04|             85.09|
+-------

In [14]:
# Checking the Schema after deletion
df.printSchema()

root
 |-- zomato_url: string (nullable = true)
 |-- name: string (nullable = true)
 |-- city: string (nullable = true)
 |-- area: string (nullable = true)
 |-- rating: string (nullable = true)
 |-- rating_count: string (nullable = true)
 |-- telephone: string (nullable = true)
 |-- cusine: string (nullable = true)
 |-- cost_for_two: string (nullable = true)
 |-- address: string (nullable = true)
 |-- online_order: string (nullable = true)
 |-- table_reservation: string (nullable = true)
 |-- delivery_only: string (nullable = true)
 |-- longitude: string (nullable = true)
 |-- latitude: string (nullable = true)



In [15]:
# Counting the missing values from each feature
for col in df.columns:
    print(col, "\t", "with NULL values = ", df.filter(df[col].isNull()).count())

zomato_url 	 with NULL values =  5
name 	 with NULL values =  62
city 	 with NULL values =  79
area 	 with NULL values =  82
rating 	 with NULL values =  79867
rating_count 	 with NULL values =  82266
telephone 	 with NULL values =  1711
cusine 	 with NULL values =  1440
cost_for_two 	 with NULL values =  3839
address 	 with NULL values =  2016
online_order 	 with NULL values =  571
table_reservation 	 with NULL values =  578
delivery_only 	 with NULL values =  583
longitude 	 with NULL values =  594
latitude 	 with NULL values =  595


In [16]:
#df = df.fillna('0')

In [17]:
# Counting the missing values from each feature
'''for col in df.columns:
    print(col, "\t", "with NULL values = ", df.filter(df[col].isNull()).count())'''

'for col in df.columns:\n    print(col, "\t", "with NULL values = ", df.filter(df[col].isNull()).count())'

In [18]:
# Scripting in Python for changing datatypes
'''
df['rating'] = df['rating'].astype(float)
df['rating_count'] = df['rating_count'].astype(int)
df['telephone'] = df['telephone'].astype(int)
df['cost_for_two'] = df['cost_for_two'].astype(int)
df['longitude'] = df['longitude'].astype(double)
df['latitude] = df['latitude'].astype(double)
'''

"\ndf['rating'] = df['rating'].astype(float)\ndf['rating_count'] = df['rating_count'].astype(int)\ndf['telephone'] = df['telephone'].astype(int)\ndf['cost_for_two'] = df['cost_for_two'].astype(int)\ndf['longitude'] = df['longitude'].astype(double)\ndf['latitude] = df['latitude'].astype(double)\n"

In [19]:
# Changing the datatypes according to the PySpark
df = df.withColumn("rating", df['rating'].cast(FloatType()))
df = df.withColumn('rating_count', df['rating_count'].cast(IntegerType()))
df = df.withColumn('telephone', df['telephone'].cast(IntegerType()))
df = df.withColumn('cost_for_two', df['cost_for_two'].cast(IntegerType()))
df = df.withColumn('longitude', df['longitude'].cast(DoubleType()))
df = df.withColumn('latitude', df['latitude'].cast(DoubleType()))

In [20]:
df.printSchema()

root
 |-- zomato_url: string (nullable = true)
 |-- name: string (nullable = true)
 |-- city: string (nullable = true)
 |-- area: string (nullable = true)
 |-- rating: float (nullable = true)
 |-- rating_count: integer (nullable = true)
 |-- telephone: integer (nullable = true)
 |-- cusine: string (nullable = true)
 |-- cost_for_two: integer (nullable = true)
 |-- address: string (nullable = true)
 |-- online_order: string (nullable = true)
 |-- table_reservation: string (nullable = true)
 |-- delivery_only: string (nullable = true)
 |-- longitude: double (nullable = true)
 |-- latitude: double (nullable = true)



## Analysis

#### What are the top 10 rated restaurants in the dataset?

In [21]:
q1 = df.select('name', 'rating', 'rating_count').groupby('name', 'rating', 'rating_count').agg(count('*').alias('count')).orderBy(desc('rating_count')).limit(10)
q1.show()

+--------------------+------+------------+-----+
|                name|rating|rating_count|count|
+--------------------+------+------------+-----+
|            Bawarchi|   4.5|       42621|    1|
|Byg Brewski Brewi...|   4.9|       19305|    1|
|                Toit|   4.6|       15731|    1|
|            Truffles|   4.6|       15653|    1|
|    Hauz Khas Social|   4.7|       14936|    1|
|AB's - Absolute B...|   4.8|       13164|    1|
|            Paradise|   4.7|       13152|    1|
|     The Black Pearl|   4.9|       12686|    1|
|Shah Ghouse Hotel...|   4.2|       12514|    1|
|           Peter Cat|   4.2|       11917|    1|
+--------------------+------+------------+-----+



#### What are the top 10 locations for restaurants?

In [22]:
q2 = df.select('name', 'rating', 'rating_count', 'city').groupby('name', 'rating', 'rating_count', 'city').agg(count('*').alias('count')).orderBy(desc('rating_count')).limit(10)
q2.show()

+--------------------+------+------------+---------+-----+
|                name|rating|rating_count|     city|count|
+--------------------+------+------------+---------+-----+
|            Bawarchi|   4.5|       42621|Hyderabad|    1|
|Byg Brewski Brewi...|   4.9|       19305|Bengaluru|    1|
|                Toit|   4.6|       15731|Bengaluru|    1|
|            Truffles|   4.6|       15653|Bengaluru|    1|
|    Hauz Khas Social|   4.7|       14936|Delhi NCR|    1|
|AB's - Absolute B...|   4.8|       13164|Bengaluru|    1|
|            Paradise|   4.7|       13152|Hyderabad|    1|
|     The Black Pearl|   4.9|       12686|Bengaluru|    1|
|Shah Ghouse Hotel...|   4.2|       12514|Hyderabad|    1|
|           Peter Cat|   4.2|       11917|  Kolkata|    1|
+--------------------+------+------------+---------+-----+



So, mainly Hyderabad and Bengaluru has the highest rated restaurants in the entire country just by analyzing the data. 

#### What are the top 10 most served cusine in the dataset?

In [23]:
q3 = df.select('cusine').groupby('cusine').agg(count('*').alias('count')).orderBy(desc('count')).limit(10)
q3.show()

+--------------------+-----+
|              cusine|count|
+--------------------+-----+
|        North Indian|20549|
|           Fast Food|11455|
|North Indian, Chi...|11230|
|        South Indian| 6676|
|              Bakery| 5703|
|             Chinese| 5346|
|         Street Food| 4134|
|    Bakery, Desserts| 3297|
|             Biryani| 2791|
|Chinese, North In...| 2675|
+--------------------+-----+



So, here we can see the the *North Indian* cuisine is the most served cuisine in our dataset. Second to it is the *Fast Food* that we all love to it on the go. Other than that it has *South Indian* dishes in top 5 spot. It is not surprising to see the Indian's most preferred dish- *Biryani* to secure a place in th top 10.

#### In the top 10 restaurants, what is the most preferred cuisine?

In [24]:
q4 = df.select('name', 'rating', 'rating_count', 'cusine').groupby('name', 'rating', 'rating_count', 'cusine').agg(count('*').alias('count')).orderBy(desc('rating_count')).limit(10)
q4.show()

+--------------------+------+------------+--------------------+-----+
|                name|rating|rating_count|              cusine|count|
+--------------------+------+------------+--------------------+-----+
|            Bawarchi|   4.5|       42621|Biryani, Hyderaba...|    1|
|Byg Brewski Brewi...|   4.9|       19305|Continental, Nort...|    1|
|                Toit|   4.6|       15731|Italian, American...|    1|
|            Truffles|   4.6|       15653|Cafe, American, B...|    1|
|    Hauz Khas Social|   4.7|       14936|Kontinentálna, Am...|    1|
|AB's - Absolute B...|   4.8|       13164|European, Mediter...|    1|
|            Paradise|   4.7|       13152|Biryani, North In...|    1|
|     The Black Pearl|   4.9|       12686|North Indian, Eur...|    1|
|Shah Ghouse Hotel...|   4.2|       12514|Biryani, North In...|    1|
|           Peter Cat|   4.2|       11917|Continental, Nort...|    1|
+--------------------+------+------------+--------------------+-----+



#### Top 10 cuisine (Some major Cities)

In [25]:
q5_mumbai = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine').filter(df.city == 'Mumbai').orderBy(desc('rating_count')).limit(10)
q5_mumbai.show()

+--------------------+------+------------+------+--------------------+--------------------+
|                name|rating|rating_count|  city|                area|              cusine|
+--------------------+------+------------+------+--------------------+--------------------+
|       Colaba Social|   4.9|       10311|Mumbai|              Colaba|Americká, Severoi...|
|             Candies|   4.1|       10300|Mumbai|Pali Hill, Bandra...|Cafe, Desserts, I...|
|        Joey's Pizza|   4.5|        9503|Mumbai|          Azad Nagar|               Pizza|
|        Prithvi Cafe|   4.4|        8401|Mumbai|                Juhu|      Cafe, Desserts|
|        Joey's Pizza|   4.5|        8022|Mumbai|          Malad West|               Pizza|
|Chili's American ...|   4.4|        7997|Mumbai|               Powai|American, Mexican...|
|Chili's American ...|   4.5|        7977|Mumbai|          Malad West|American, Mexican...|
|  Leopold Cafe & Bar|   4.0|        7474|Mumbai|              Colaba|Chinese, I

For Mumbai, the street food along with the exotic food style is the main cuisine that the today's generation like and are penchant towards.

In [26]:
q5_hyderabad = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine').filter(df.city == 'Hyderabad').orderBy(desc('rating_count')).limit(10)
q5_hyderabad.show()

+--------------------+------+------------+---------+---------------+--------------------+
|                name|rating|rating_count|     city|           area|              cusine|
+--------------------+------+------------+---------+---------------+--------------------+
|            Bawarchi|   4.5|       42621|Hyderabad|     Nallakunta|Biryani, Hyderaba...|
|            Paradise|   4.7|       13152|Hyderabad|Paradise Circle|Biryani, North In...|
|Shah Ghouse Hotel...|   4.2|       12514|Hyderabad|     Gachibowli|Biryani, North In...|
|    Lucky Restaurant|   4.3|       11103|Hyderabad|         Nagole|Biryani, North In...|
|          Cafe Bahar|   4.6|       11000|Hyderabad|   Basheer Bagh|Biryani, North In...|
|AB's - Absolute B...|   4.9|       10086|Hyderabad|  Jubilee Hills|European, Mediter...|
|        Captain Cook|   4.1|        9494|Hyderabad|    Musheerabad|Biryani, North In...|
|            Flechazo|   4.6|        8768|Hyderabad|       Madhapur|Asian, Mediterran...|
|Shah Ghou

It is no brainer that the city of Hyderabad, which is famous for its Biryani dishes will have the most favorable cuisines in most of the restaurants as it.

In [27]:
q5_bengaluru = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine').filter(df.city == 'Bengaluru').orderBy(desc('rating_count')).limit(10)
q5_bengaluru.show()

+--------------------+------+------------+---------+--------------------+--------------------+
|                name|rating|rating_count|     city|                area|              cusine|
+--------------------+------+------------+---------+--------------------+--------------------+
|Byg Brewski Brewi...|   4.9|       19305|Bengaluru|       Sarjapur Road|Continental, Nort...|
|                Toit|   4.6|       15731|Bengaluru|         Indiranagar|Italian, American...|
|            Truffles|   4.6|       15653|Bengaluru|Koramangala 5th B...|Cafe, American, B...|
|AB's - Absolute B...|   4.8|       13164|Bengaluru|        Marathahalli|European, Mediter...|
|     The Black Pearl|   4.9|       12686|Bengaluru|Koramangala 5th B...|North Indian, Eur...|
|      TBC Sky Lounge|   4.9|       10845|Bengaluru|                 HSR|Continental, Asia...|
|         Big Pitcher|   4.7|       10789|Bengaluru|    Old Airport Road|American, Contine...|
|              Onesta|   4.4|        9452|Bengalur

Banglore, otherwise known as Bengaluru has a mixture of dishes that the parochial people love and have them serve on their table. From its own country made dishes to the exotic European and American dishes, the people there loves to try new dishes.

In [28]:
q5_delhi = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine').filter(df.city == 'Delhi NCR').orderBy(desc('rating_count')).limit(10)
q5_delhi.show()

+--------------------+------+------------+---------+-----------------+--------------------+
|                name|rating|rating_count|     city|             area|              cusine|
+--------------------+------+------------+---------+-----------------+--------------------+
|    Hauz Khas Social|   4.7|       14936|Delhi NCR|Hauz Khas Village|Kontinentálna, Am...|
|      Warehouse Cafe|   4.1|       10035|Delhi NCR|  Connaught Place|American, Contine...|
|  Lord Of The Drinks|   4.4|        9582|Delhi NCR|  Connaught Place|European, Chinese...|
|             Tamasha|   4.5|        8866|Delhi NCR|  Connaught Place|Finger Food, Nort...|
|     Saravana Bhavan|   4.4|        7906|Delhi NCR|  Connaught Place|South Indian, Des...|
|The Flying Saucer...|   4.3|        7903|Delhi NCR|      Nehru Place|Continental, Bar ...|
|               Local|   4.4|        6966|Delhi NCR|  Connaught Place|North Indian, Con...|
|             Karim's|   4.1|        6761|Delhi NCR|      Jama Masjid|Mughlai, N

Delhi also shows wide diversity in the tastes of the local people. They have South Indian and North Indian dishes to European as well as some American ones to. 

In [29]:
q5_kolkata = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine').filter(df.city == 'Kolkata').orderBy(desc('rating_count')).limit(10)
q5_kolkata.show()

+-------------------+------+------------+-------+-------------------+--------------------+
|               name|rating|rating_count|   city|               area|              cusine|
+-------------------+------+------------+-------+-------------------+--------------------+
|          Peter Cat|   4.2|       11917|Kolkata|   Park Street Area|Continental, Nort...|
|    Barbeque Nation|   4.7|        8551|Kolkata|Sector 5, Salt Lake|North Indian, Chi...|
|              BarBQ|   4.4|        8052|Kolkata|   Park Street Area|Chinese, North In...|
|Chili's Grill & Bar|   4.7|        7615|Kolkata|         Ballygunge|American, Mexican...|
|            Mocambo|   4.2|        6466|Kolkata|   Park Street Area|         Continental|
|            Arsalan|   4.1|        5963|Kolkata|   Park Circus Area|Mughlai, North In...|
|          Oudh 1590|   4.5|        5953|Kolkata|     Desapriya Park|Biryani, Mughlai,...|
|        Spice Kraft|   4.8|        5243|Kolkata|         Ballygunge|Continental, Asia...|

Kolkata prefers Continential along with Chinese dishes along with a other offerings.

In [30]:
q5_chennai = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine').filter(df.city == 'Chennai').orderBy(desc('rating_count')).limit(10)
q5_chennai.show()

+--------------------+------+------------+-------+------------+--------------------+
|                name|rating|rating_count|   city|        area|              cusine|
+--------------------+------+------------+-------+------------+--------------------+
|AB's - Absolute B...|   4.9|       10357|Chennai|    T. Nagar|BBQ, North Indian...|
|      Coal Barbecues|   4.9|        8171|Chennai|   Velachery|North Indian, Chi...|
|      Coal Barbecues|   4.9|        7348|Chennai|    T. Nagar|North Indian, Med...|
|     Barbeque Nation|   4.8|        5799|Chennai|    T. Nagar|North Indian, Con...|
|              Onesta|   4.9|        5126|Chennai| Semmancheri|Pizza, Italian, F...|
|            Paradise|   4.4|        5106|Chennai|   Perungudi|Biryani, North In...|
|Chili's American ...|   4.8|        4707|Chennai|  Royapettah|Mexican, American...|
|      Copper Kitchen|   4.6|        4351|Chennai|       Porur|Seafood, North In...|
|              Maplai|   4.4|        4279|Chennai|Nungambakkam|Ch

People in Chennai has varies restaurant ranging from BBQ, Seafood, Biryani, and some North Indian cuisines.

#### What are the top 10 costliest restaurants in the country?

In [31]:
q6 = df.select('name', 'rating', 'cost_for_two', 'city').orderBy(desc('cost_for_two')).limit(10)
q6.show()

+--------------------+------+------------+---------+
|                name|rating|cost_for_two|     city|
+--------------------+------+------------+---------+
|Ocean - The Priva...|   3.6|       30000|   Mumbai|
|Gol Bungalow - Ta...|   3.9|       15000|Hyderabad|
|              Bhairo|   2.9|       15000|  Udaipur|
|          Fly Dining|   3.8|       14000|Bengaluru|
|Pillars - Umaid B...|   3.7|       12000|  Jodhpur|
|Risala- Umaid Bha...|   4.1|       12000|  Jodhpur|
|Trophy Bar- Umaid...|   3.3|       12000|  Jodhpur|
|Wasabi By Morimot...|   4.3|       10000|   Mumbai|
|Orient Express - ...|   4.2|        8000|Delhi NCR|
|Yuuka - The St. R...|   4.1|        8000|   Mumbai|
+--------------------+------+------------+---------+



#### What is the top 10 costliest restaurants in specific city?

In [32]:
q7_mumbai = df.select('name', 'rating', 'cost_for_two', 'area', 'city').filter(df.city == 'Mumbai').orderBy(desc('cost_for_two')).limit(10)
q7_mumbai.show()

+--------------------+------+------------+--------------------+------+
|                name|rating|cost_for_two|                area|  city|
+--------------------+------+------------+--------------------+------+
|Ocean - The Priva...|   3.6|       30000|     Vile Parle East|Mumbai|
|Wasabi By Morimot...|   4.3|       10000|              Colaba|Mumbai|
|Yuuka - The St. R...|   4.1|        8000|         Lower Parel|Mumbai|
|              Masque|   4.3|        6500|           Mahalaxmi|Mumbai|
|Masala Kraft - Th...|   4.1|        6500|              Colaba|Mumbai|
|Le Cirque Signatu...|   4.0|        6000|             Chakala|Mumbai|
|Celini - Grand Hyatt|   4.2|        6000|      Santacruz East|Mumbai|
|Vista - Taj Lands...|   4.0|        5500|Bandstand, Bandra...|Mumbai|
|Souk - The Taj Ma...|   4.2|        5500|              Colaba|Mumbai|
|Bombay High - ITC...|   3.6|        5500|             Chakala|Mumbai|
+--------------------+------+------------+--------------------+------+



The most costliest hotel in the country is **Ocean - The Private Dining Room - Sahara Star** which is in Mumbai.

In [33]:
q7_hyderabad = df.select('name', 'rating', 'cost_for_two', 'area', 'city').filter(df.city == 'Hyderabad').orderBy(desc('cost_for_two')).limit(10)
q7_hyderabad.show()

+--------------------+------+------------+-------------+---------+
|                name|rating|cost_for_two|         area|     city|
+--------------------+------+------------+-------------+---------+
|Gol Bungalow - Ta...|   3.9|       15000|    Falaknuma|Hyderabad|
|Adaa - Taj Falakn...|   4.5|        8000|    Falaknuma|Hyderabad|
|Celeste - Taj Fal...|   4.4|        7000|    Falaknuma|Hyderabad|
|Vineela Yadlapall...|   3.5|        5000|Jubilee Hills|Hyderabad|
|Thai Pavilion - V...|   4.3|        4000|     Begumpet|Hyderabad|
|Altitude - Hydera...|   4.1|        4000|Necklace Road|Hyderabad|
|  Prego - The Westin|   4.2|        3500|  Hitech City|Hyderabad|
|Dakshin - ITC Kak...|   4.4|        3400|     Begumpet|Hyderabad|
|Kebabs & Kurries ...|   4.2|        3400|     Begumpet|Hyderabad|
|   Ohm - Hotel Avasa|   4.2|        3000|  Hitech City|Hyderabad|
+--------------------+------+------------+-------------+---------+



For the city of Hyderabad, the most costly restaurant is the **Gol Bungalow - Taj Falaknuma Palace** 

In [34]:
q7_bengaluru = df.select('name', 'rating', 'cost_for_two', 'area', 'city').filter(df.city == 'Bengaluru').orderBy(desc('cost_for_two')).limit(10)
q7_bengaluru.show()

+--------------------+------+------------+-----------------+---------+
|                name|rating|cost_for_two|             area|     city|
+--------------------+------+------------+-----------------+---------+
|          Fly Dining|   3.8|       14000|         Nagawara|Bengaluru|
|Le Cirque Signatu...|   4.2|        6000| Old Airport Road|Bengaluru|
|Royal Afghan - IT...|   4.3|        6000|      Sankey Road|Bengaluru|
|         Grasshopper|   4.3|        5000|Bannerghatta Road|Bengaluru|
|Dakshin - ITC Win...|   4.3|        5000|      Sankey Road|Bengaluru|
|Dum Pukht Jolly N...|   4.3|        5000|      Sankey Road|Bengaluru|
|Malties - Radisso...|   4.2|        4500|     Marathahalli|Bengaluru|
|La Brasserie - Le...|   4.1|        4100|      Sankey Road|Bengaluru|
|Edo Restaurant & ...|   4.3|        4000|    Richmond Road|Bengaluru|
|Riwaz - The Ritz-...|   4.1|        4000|   Residency Road|Bengaluru|
+--------------------+------+------------+-----------------+---------+



**Fly Dining** in Bangalore is the most expensive restaurant in that city.

In [35]:
q7_delhi = df.select('name', 'rating', 'cost_for_two', 'area', 'city').filter(df.city == 'Delhi NCR').orderBy(desc('cost_for_two')).limit(10)
q7_delhi.show()

+--------------------+------+------------+-------------+---------+
|                name|rating|cost_for_two|         area|     city|
+--------------------+------+------------+-------------+---------+
|Orient Express - ...|   4.2|        8000| Chanakyapuri|Delhi NCR|
|Tian - Asian Cuis...|   4.1|        7000| Chanakyapuri|Delhi NCR|
|Ottimo At West Vi...|   4.1|        6600| Chanakyapuri|Delhi NCR|
|Bukhara - ITC Maurya|   4.3|        6500| Chanakyapuri|Delhi NCR|
|Nostalgia at 1911...|   3.5|        6000|      Janpath|Delhi NCR|
|The Theatre Club ...|   3.8|        6000|Paschim Vihar|Delhi NCR|
|Amaranta - The Ob...|   4.1|        6000|  Udyog Vihar|Delhi NCR|
|The Spice Route -...|   4.1|        6000|      Janpath|Delhi NCR|
|House of Ming - T...|   4.1|        5500|Mansingh Road|Delhi NCR|
|San Gimignano - T...|   4.0|        5500|      Janpath|Delhi NCR|
+--------------------+------+------------+-------------+---------+



**Orient Express - Taj Palace** is the exorbitant restaurant in the capital.

In [36]:
q7_kolkata = df.select('name', 'rating', 'cost_for_two', 'area', 'city').filter(df.city == 'Kolkata').orderBy(desc('cost_for_two')).limit(10)
q7_kolkata.show()

+--------------------+------+------------+-------------------+-------+
|                name|rating|cost_for_two|               area|   city|
+--------------------+------+------------+-------------------+-------+
|Chinoiserie - Taj...|   4.3|        5000|            Alipore|Kolkata|
|Guchhi - Hyatt Re...|   4.0|        4600|Sector 3, Salt Lake|Kolkata|
|Eden Pavilion - I...|   4.0|        4500|  Science City Area|Kolkata|
|West View Bar & G...|   3.8|        4500|  Science City Area|Kolkata|
|     Seasonal Tastes|   4.3|        4200|           New Town|Kolkata|
|The Junction - Ta...|   3.8|        4200|            Alipore|Kolkata|
|     Seasonal Tastes|   4.4|        4200|           New Town|Kolkata|
|      Zen - The Park|   4.1|        4000|   Park Street Area|Kolkata|
|The Legacy Grill ...|   3.9|        4000|          Esplanade|Kolkata|
|Baan Thai - The O...|   4.2|        4000|    New Market Area|Kolkata|
+--------------------+------+------------+-------------------+-------+



**Chinoiserie - Taj Bengal** is the most expensive restaurant in the city of Kolkata

In [37]:
q7_chennai = df.select('name', 'rating', 'cost_for_two', 'area', 'city').filter(df.city == 'Chennai').orderBy(desc('cost_for_two')).limit(10)
q7_chennai.show()

+--------------------+------+------------+---------------+-------+
|                name|rating|cost_for_two|           area|   city|
+--------------------+------+------------+---------------+-------+
|         Tao of Peng|   4.5|        5500|  Mahabalipuram|Chennai|
|Royal Vega - ITC ...|   4.4|        5500|         Guindy|Chennai|
|Blend - Taj Club ...|   4.0|        5000|Thousand Lights|Chennai|
|Ottimo Cucina Ita...|   4.1|        5000|         Guindy|Chennai|
|Salt. Co. 531 - R...|   4.1|        4800|       GST Road|Chennai|
|Peshawri - ITC Gr...|   4.4|        4500|         Guindy|Chennai|
|Avartana - ITC Gr...|   4.7|        4500|         Guindy|Chennai|
|Pan Asian - ITC G...|   4.5|        4200|         Guindy|Chennai|
|Madras Pavilion -...|   4.5|        4000|         Guindy|Chennai|
|The Great Kabab F...|   4.0|        4000|       GST Road|Chennai|
+--------------------+------+------------+---------------+-------+



**Tao of Peng** is the costliest restaurant in the city of Chennai.

#### Top 10 cheapest restaurant

In [38]:
q8 = df.select('name', 'rating', 'cost_for_two', 'city').filter(df.cost_for_two > 50).orderBy(asc('cost_for_two')).limit(10)
q8.show()

+--------------------+------+------------+---------+
|                name|rating|cost_for_two|     city|
+--------------------+------+------------+---------+
|            18 Dosas|   2.7|          60|Hyderabad|
|       SV Idli House|  null|          60|Bengaluru|
|  Indian Coffee Shop|  null|          60|    Kochi|
|         Lemon Drops|  null|          60|Bengaluru|
|          Mamee Soup|   4.1|          60|  Chennai|
|     Sanskriti Chaat|   3.8|          60|   Jaipur|
|Shri Gokul Prasad...|   3.2|          60|  Lucknow|
|       Hotel Mandala|  null|          60|  Gangtok|
|   Chak De Fast Food|   3.3|          60|   Kanpur|
|Shri Kalila Chat ...|  null|          60|  Lucknow|
+--------------------+------+------------+---------+



#### What are the top 10 cheapest restaurants in the specific cities?

In [39]:
q9_mumbai = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 40) & (df.city == 'Mumbai')).orderBy(asc('cost_for_two')).limit(10)
q9_mumbai.show()

+--------------------+------+------------+------+--------------------+
|                name|rating|cost_for_two|  city|                area|
+--------------------+------+------------+------+--------------------+
|     Shegaon Kachori|   3.3|          50|Mumbai|  Khopat, Thane West|
|Kullad Chai Coffe...|   3.1|          50|Mumbai|Hiranandani Estat...|
|        More Vadapav|  null|          50|Mumbai|      Kopar Khairane|
|           Ho5 Store|   3.3|          50|Mumbai|        Matunga West|
|Cool Katta Kulfie...|  null|          50|Mumbai|        Dahisar West|
|     Akshar Soda Pub|  null|          50|Mumbai|      Dombivali East|
|Jai Ambika Pan Bh...|   3.2|          50|Mumbai|          Ulhasnagar|
|Batulz- Cakes N More|   2.9|          50|Mumbai|             Kamothe|
|              Nuskha|  null|          50|Mumbai|               Vasai|
|Sanjay Vadapav Stall|  null|          50|Mumbai|               Marol|
+--------------------+------+------------+------+--------------------+



In [40]:
q9_hyderabad = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 40) & (df.city == 'Hyderabad')).orderBy(asc('cost_for_two')).limit(10)
q9_hyderabad.show()

+--------------------+------+------------+---------+-----------+
|                name|rating|cost_for_two|     city|       area|
+--------------------+------+------------+---------+-----------+
|         Sohail Cafe|   3.3|          50|Hyderabad|Musheerabad|
|  Zar Zari Zar Baksh|   3.0|          50|Hyderabad|Musheerabad|
|Sri Gayatri Dabel...|   3.3|          50|Hyderabad|   Kothapet|
|     Naidu Tea Stall|   3.1|          50|Hyderabad| Kukatpally|
|   K.G.N Veg Biryani|   2.9|          50|Hyderabad|  Charminar|
|        Chai Deewane|   3.2|          50|Hyderabad| Kukatpally|
|     Khaliq Pan Shop|  null|          50|Hyderabad|  Falaknuma|
|  Indian Parata Shop|  null|          50|Hyderabad|  Kacheguda|
| Chennai Coffee Shop|  null|          50|Hyderabad|   Madhapur|
|    City Grand Hotel|  null|          50|Hyderabad|   Nampally|
+--------------------+------+------------+---------+-----------+



In [41]:
q9_bengaluru = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 30) & (df.city == 'Bengaluru')).orderBy(asc('cost_for_two')).limit(10)
q9_bengaluru.show()

+--------------------+------+------------+---------+-----------------+
|                name|rating|cost_for_two|     city|             area|
+--------------------+------+------------+---------+-----------------+
|Shree Vishnu That...|   3.9|          40|Bengaluru|           Domlur|
|      Indira Canteen|  null|          50|Bengaluru|         RT Nagar|
|            VarieTea|   3.7|          50|Bengaluru|     New BEL Road|
|           Tea Vibes|  null|          50|Bengaluru|        Banaswadi|
|      Indira Canteen|  null|          50|Bengaluru|     Sanjay Nagar|
|     Black Pekoe Tea|  null|          50|Bengaluru|              BTM|
|      Bread & Better|  null|          50|Bengaluru|Bannerghatta Road|
|  Maghai Paan Center|  null|          50|Bengaluru|      Indiranagar|
|         Lemon Drops|  null|          60|Bengaluru|  Electronic City|
|       SV Idli House|  null|          60|Bengaluru|     Marathahalli|
+--------------------+------+------------+---------+-----------------+



In [42]:
q9_delhi = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 40) & (df.city == 'Delhi NCR')).orderBy(asc('cost_for_two')).limit(10)
q9_delhi.show()

+--------------------+------+------------+---------+-------------------+
|                name|rating|cost_for_two|     city|               area|
+--------------------+------+------------+---------+-------------------+
|           Sata Paan|  null|          50|Delhi NCR|          Janakpuri|
|Aggarwal Jalebi Wale|   2.7|          50|Delhi NCR|        Uttam Nagar|
| Shri Saheb Ji Dairy|   3.7|          50|Delhi NCR|     Dilshad Garden|
|  Gopal Kachori Wala|   3.3|          50|Delhi NCR|       Punjabi Bagh|
|       Duggal Snacks|   3.9|          50|Delhi NCR|Mayur Vihar Phase 2|
|Pandit Ved Prakas...|   3.9|          50|Delhi NCR|      Chandni Chowk|
|         Snacks Time|  null|          50|Delhi NCR|          Jail Road|
|  The Paratha Corner|  null|          50|Delhi NCR|         Trilokpuri|
| Shri Ram Poori Wale|   2.8|          50|Delhi NCR|        Kirti Nagar|
|          Chaat Star|  null|          50|Delhi NCR|        Gaur City 1|
+--------------------+------+------------+---------

In [43]:
q9_kolkata = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 40) & (df.city == 'Kolkata')).orderBy(asc('cost_for_two')).limit(10)
q9_kolkata.show()

+--------------------+------+------------+-------+------------------+
|                name|rating|cost_for_two|   city|              area|
+--------------------+------+------------+-------+------------------+
|    Debu's Shonpapri|  null|          50|Kolkata|           Kalyani|
|    Ramji Ghugniwala|   2.9|          80|Kolkata|  Park Street Area|
|  Ramesh Chilla Wala|   3.1|          80|Kolkata|  Park Street Area|
|      Tea And Tiffin|  null|         100|Kolkata|      Kona Exp Way|
|Sri Krishna Hot C...|   3.1|         100|Kolkata| Camac Street Area|
|               Mahua|   3.8|         100|Kolkata|            Sinthi|
|        Ghosh Sweets|   3.3|         100|Kolkata|       Kankurgachi|
|Shree  Hari Mista...|   3.3|         100|Kolkata|          Gariahat|
|           Abhilasha|  null|         100|Kolkata|Dalhousie BBD Bagh|
|Laxmi Narayan Mis...|   3.2|         100|Kolkata|         Baguihati|
+--------------------+------+------------+-------+------------------+



In [44]:
q9_chennai = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 30) & (df.city == 'Chennai')).orderBy(asc('cost_for_two')).limit(10)
q9_chennai.show()

+--------------------+------+------------+-------+-------------+
|                name|rating|cost_for_two|   city|         area|
+--------------------+------+------------+-------+-------------+
|            Soda Hub|   3.3|          40|Chennai|     Navallur|
|         Planet Soda|  null|          50|Chennai|    Perungudi|
|            Tea Talk|  null|          50|Chennai|       Guindy|
| Indian Coffee House|  null|          50|Chennai|       Vepery|
|     Saravana Coffee|   3.1|          50|Chennai|  Kodambakkam|
|            Kaafemaa|  null|          50|Chennai|     Ambattur|
|Jai Sri Prathyang...|  null|          50|Chennai|     T. Nagar|
|          Mamee Soup|   4.1|          60|Chennai|West Mambalam|
|              REFUEL|  null|          80|Chennai|  Kelambakkam|
|Perambur Mangala ...|   3.3|         100|Chennai|     Perambur|
+--------------------+------+------------+-------+-------------+

