<a href="https://colab.research.google.com/github/Devvrat53/Restaurant-Analysis/blob/main/Restaurant_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Installation

In [1]:
 from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
#!pip install --upgrade pip
#!pip install folium
#!pip install plotly
#!pip install pyspark

## Import

In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import format_number, avg, desc, count, asc
from pyspark.sql.types import * # Data types in PySpark
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import plotly.express as px

In [4]:
spark = SparkSession.builder.appName('Restaurant-Data-Analysis').getOrCreate()

In [5]:
df_path = '/content/drive/MyDrive/BDA-Mini-Project/Dataset/Zomato Kaggle/Zomato India Restaurants (2 Lakh + restaurants data)/data/indian_restaurants_details_cleaned_data.csv'
df = spark.read.format('csv').option('header', True).option('inferSchema', True).load(df_path)

In [6]:
df.show(5)
print("Total Records in the dataset= ", df.count())

+--------------------+--------------------+---------+------------+------+------------+--------------------+--------------------+------------+--------------------+--------------------+------------+-----------------+-------------+--------------------+-------------+-------------+
|          zomato_url|                name|     city|        area|rating|rating_count|           telephone|              cusine|cost_for_two|             address|             timings|online_order|table_reservation|delivery_only|         famous_food|    longitude|     latitude|
+--------------------+--------------------+---------+------------+------+------------+--------------------+--------------------+------------+--------------------+--------------------+------------+-----------------+-------------+--------------------+-------------+-------------+
|https://www.zomat...|         Sainik Food|Delhi NCR|Pandav Nagar|   3.2|        21.0|011 22486474 +91 ...|        North Indian|       300.0|C 4/1, Opposite M...|{'Mo

In [7]:
# Describing the Schema of the dataset
df.printSchema()

root
 |-- zomato_url: string (nullable = true)
 |-- name: string (nullable = true)
 |-- city: string (nullable = true)
 |-- area: string (nullable = true)
 |-- rating: string (nullable = true)
 |-- rating_count: string (nullable = true)
 |-- telephone: string (nullable = true)
 |-- cusine: string (nullable = true)
 |-- cost_for_two: string (nullable = true)
 |-- address: string (nullable = true)
 |-- timings: string (nullable = true)
 |-- online_order: string (nullable = true)
 |-- table_reservation: string (nullable = true)
 |-- delivery_only: string (nullable = true)
 |-- famous_food: string (nullable = true)
 |-- longitude: string (nullable = true)
 |-- latitude: string (nullable = true)



In [8]:
# Columns in the dataset
df.columns

['zomato_url',
 'name',
 'city',
 'area',
 'rating',
 'rating_count',
 'telephone',
 'cusine',
 'cost_for_two',
 'address',
 'timings',
 'online_order',
 'table_reservation',
 'delivery_only',
 'famous_food',
 'longitude',
 'latitude']

In [9]:
# Shape of the dataset i.e. the rows and the columns
shape = (df.count(), len(df.columns))
print("The Shape of the dataset= ", shape)

The Shape of the dataset=  (224854, 17)


In [10]:
# Selecting some columns for displaying
df.select('name', 'city', 'area', 'rating', 'cusine', 'cost_for_two', 'longitude', 'latitude').show()

+--------------------+---------+--------------------+------+--------------------+------------+-------------+-------------+
|                name|     city|                area|rating|              cusine|cost_for_two|    longitude|     latitude|
+--------------------+---------+--------------------+------+--------------------+------------+-------------+-------------+
|         Sainik Food|Delhi NCR|        Pandav Nagar|   3.2|        North Indian|       300.0|77.2848711535|28.6177324058|
|Kunal's Creamery ...|   Mumbai|           Ambernath|   3.6|Street Food, Chin...|       500.0|73.1842865422|19.2058869331|
|Brij Palace Resta...|Delhi NCR|              Jasola|  null|        North Indian|       250.0|77.2912229598|28.5630343606|
|         Sahib Hotel|Delhi NCR|           Paharganj|  null|        North Indian|       300.0|77.2182980552| 28.642410638|
|            Chunky's|  Kolkata|             Shibpur|   3.0|Italian, Pizza, C...|       500.0|88.3307084441|22.5777582163|
|      The Food 

## Pre-processing

In [11]:
# Drop unwanted column
df = df.drop('timings', 'famous_food')

In [12]:
# Statistical Properties of the dataframe
#df.select('rating', 'rating_count', 'cost_for_two', 'online_order', 'table_reservation', 'delivery_only').describe().show(5)

In [13]:
# Cleaned way
df.select(format_number(df['rating'].cast('float'), 2).alias('rating'),
         format_number(df['rating_count'].cast('float'), 2).alias('rating_count'),
         format_number(df['cost_for_two'].cast('float'), 2).alias('cost_for_two'),
         format_number(df['online_order'].cast('float'), 2).alias('online_order'),
         format_number(df['table_reservation'].cast('float'), 2).alias('table_reservation'),
         format_number(df['delivery_only'].cast('float'), 2).alias('delivery_only')).describe().show()

+-------+-------------------+------------------+------------------+------------------+-----------------+------------------+
|summary|             rating|      rating_count|      cost_for_two|      online_order|table_reservation|     delivery_only|
+-------+-------------------+------------------+------------------+------------------+-----------------+------------------+
|  count|             144736|            142398|            220989|                13|                8|                 4|
|   mean| 3.4880858943179107|101.55640338362659| 357.0730075769587|37.573076923076925|         31.31875|31.747500000000002|
| stddev|0.41998586322159115|161.58949123456605|179.71917421709884|30.744909981061742|28.89633809597927|37.322661011776745|
|    min|               0.00|              0.00|              0.00|              0.00|             0.00|              1.00|
|    max|               4.90|            999.00|            999.00|              9.96|            77.04|             85.09|
+-------

In [14]:
# Checking the Schema after deletion
df.printSchema()

root
 |-- zomato_url: string (nullable = true)
 |-- name: string (nullable = true)
 |-- city: string (nullable = true)
 |-- area: string (nullable = true)
 |-- rating: string (nullable = true)
 |-- rating_count: string (nullable = true)
 |-- telephone: string (nullable = true)
 |-- cusine: string (nullable = true)
 |-- cost_for_two: string (nullable = true)
 |-- address: string (nullable = true)
 |-- online_order: string (nullable = true)
 |-- table_reservation: string (nullable = true)
 |-- delivery_only: string (nullable = true)
 |-- longitude: string (nullable = true)
 |-- latitude: string (nullable = true)



In [15]:
# Counting the missing values from each feature
for col in df.columns:
    print(col, "\t", "with NULL values = ", df.filter(df[col].isNull()).count())

zomato_url 	 with NULL values =  5
name 	 with NULL values =  62
city 	 with NULL values =  79
area 	 with NULL values =  82
rating 	 with NULL values =  79867
rating_count 	 with NULL values =  82266
telephone 	 with NULL values =  1711
cusine 	 with NULL values =  1440
cost_for_two 	 with NULL values =  3839
address 	 with NULL values =  2016
online_order 	 with NULL values =  571
table_reservation 	 with NULL values =  578
delivery_only 	 with NULL values =  583
longitude 	 with NULL values =  594
latitude 	 with NULL values =  595


In [16]:
df = df.dropna(how= 'any')

In [17]:
# Shape of the dataset i.e. the rows and the columns
shape = (df.count(), len(df.columns))
print("The Shape of the dataset= ", shape)

The Shape of the dataset=  (139478, 15)


In [18]:
# Counting the missing values from each feature
for col in df.columns:
    print(col, "\t", "with NULL values = ", df.filter(df[col].isNull()).count())

zomato_url 	 with NULL values =  0
name 	 with NULL values =  0
city 	 with NULL values =  0
area 	 with NULL values =  0
rating 	 with NULL values =  0
rating_count 	 with NULL values =  0
telephone 	 with NULL values =  0
cusine 	 with NULL values =  0
cost_for_two 	 with NULL values =  0
address 	 with NULL values =  0
online_order 	 with NULL values =  0
table_reservation 	 with NULL values =  0
delivery_only 	 with NULL values =  0
longitude 	 with NULL values =  0
latitude 	 with NULL values =  0


In [19]:
# Scripting in Python for changing datatypes
'''
df['rating'] = df['rating'].astype(float)
df['rating_count'] = df['rating_count'].astype(int)
df['telephone'] = df['telephone'].astype(int)
df['cost_for_two'] = df['cost_for_two'].astype(int)
df['longitude'] = df['longitude'].astype(double)
df['latitude] = df['latitude'].astype(double)
'''

"\ndf['rating'] = df['rating'].astype(float)\ndf['rating_count'] = df['rating_count'].astype(int)\ndf['telephone'] = df['telephone'].astype(int)\ndf['cost_for_two'] = df['cost_for_two'].astype(int)\ndf['longitude'] = df['longitude'].astype(double)\ndf['latitude] = df['latitude'].astype(double)\n"

In [20]:
# Changing the datatypes according to the PySpark
df = df.withColumn("rating", df['rating'].cast(FloatType()))
df = df.withColumn('rating_count', df['rating_count'].cast(IntegerType()))
df = df.withColumn('telephone', df['telephone'].cast(IntegerType()))
df = df.withColumn('cost_for_two', df['cost_for_two'].cast(IntegerType()))
df = df.withColumn('longitude', df['longitude'].cast(DoubleType()))
df = df.withColumn('latitude', df['latitude'].cast(DoubleType()))

In [21]:
df.printSchema()

root
 |-- zomato_url: string (nullable = true)
 |-- name: string (nullable = true)
 |-- city: string (nullable = true)
 |-- area: string (nullable = true)
 |-- rating: float (nullable = true)
 |-- rating_count: integer (nullable = true)
 |-- telephone: integer (nullable = true)
 |-- cusine: string (nullable = true)
 |-- cost_for_two: integer (nullable = true)
 |-- address: string (nullable = true)
 |-- online_order: string (nullable = true)
 |-- table_reservation: string (nullable = true)
 |-- delivery_only: string (nullable = true)
 |-- longitude: double (nullable = true)
 |-- latitude: double (nullable = true)



In [22]:
df.show()

+--------------------+--------------------+---------+-------------------+------+------------+---------+--------------------+------------+--------------------+------------+-----------------+-------------+-------------+-------------+
|          zomato_url|                name|     city|               area|rating|rating_count|telephone|              cusine|cost_for_two|             address|online_order|table_reservation|delivery_only|    longitude|     latitude|
+--------------------+--------------------+---------+-------------------+------+------------+---------+--------------------+------------+--------------------+------------+-----------------+-------------+-------------+-------------+
|https://www.zomat...|         Sainik Food|Delhi NCR|       Pandav Nagar|   3.2|          21|     null|        North Indian|         300|C 4/1, Opposite M...|       False|            False|        False|77.2848711535|28.6177324058|
|https://www.zomat...|Kunal's Creamery ...|   Mumbai|          Ambernath

In [30]:
india_map = folium.Map(location= [20.5937, 78.9629], zoom_start= 5)
india_map

## Analysis

#### What are the top 10 rated restaurants in the dataset?

In [24]:
q1 = df.select('name', 'rating', 'rating_count', 'latitude', 'longitude').groupby('name', 'rating', 'rating_count', 'latitude', 'longitude').agg(count('*').alias('count')).orderBy(desc('rating_count')).limit(10)
q1.show()

+--------------------+------+------------+-------------+-------------+-----+
|                name|rating|rating_count|     latitude|    longitude|count|
+--------------------+------+------------+-------------+-------------+-----+
|            Bawarchi|   4.5|       42621|17.4058834625|78.4988379479|    1|
|Byg Brewski Brewi...|   4.9|       19305|12.9129344258|77.6829059422|    1|
|                Toit|   4.6|       15731| 12.978888098| 77.640921995|    1|
|            Truffles|   4.6|       15653| 12.933426714|77.6143838838|    1|
|AB's - Absolute B...|   4.8|       13164|   12.9497983|   77.6993079|    1|
|            Paradise|   4.7|       13152|17.4419196322|78.4872524813|    1|
|     The Black Pearl|   4.9|       12686|12.9344733575|77.6159777865|    1|
|Shah Ghouse Hotel...|   4.2|       12514|17.4269242726|78.3765272051|    1|
|           Peter Cat|   4.2|       11917| 22.552630386|88.3527230099|    1|
|    Lucky Restaurant|   4.3|       11103|    17.365502|    78.557579|    1|

In [45]:
top_10_restaurant_india = folium.Map(location= [20.5937, 78.9629], zoom_start= 5)
top_10_restaurant = folium.map.FeatureGroup()

for name, lat, lng, in zip(q1.toPandas()['name'].values.tolist(), q1.toPandas()['latitude'].values.tolist(), q1.toPandas()['longitude'].values.tolist()):
  top_10_restaurant.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_restaurant_india.add_child(top_10_restaurant)

#### What are the top 10 most served cusine in the dataset?

In [52]:
q2 = df.select('cusine').groupby('cusine').agg(count('*').alias('count')).orderBy(desc('count')).limit(10)
q2.show()

+--------------------+-----+
|              cusine|count|
+--------------------+-----+
|        North Indian|10401|
|North Indian, Chi...| 7164|
|           Fast Food| 5880|
|        South Indian| 3496|
|              Bakery| 2850|
|             Chinese| 2697|
|         Street Food| 2311|
|    Bakery, Desserts| 2253|
|    Pizza, Fast Food| 2185|
|Chinese, North In...| 1656|
+--------------------+-----+



In [72]:
q2_py = q2.toPandas()
fig = px.bar(q2_py, x= 'cusine', y= 'count', text= 'count', color= 'cusine')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Top 10 Most Served Cusines', xaxis_title= 'Cusine', yaxis_title= 'Count')

fig

So, here we can see the the *North Indian* cuisine is the most served cuisine in our dataset. Second to it is the *Fast Food* that we all love to it on the go. Other than that it has *South Indian* dishes in top 5 spot. It is not surprising to see the Indian's most preferred dish- *Biryani* to secure a place in th top 10.

#### In the top 10 restaurants, what is the most preferred cuisine?

In [58]:
q3 = df.select('name', 'rating', 'rating_count', 'cusine').groupby('name', 'rating', 'rating_count', 'cusine').agg(count('*').alias('count')).orderBy(desc('rating_count')).limit(10)
q3.show()

+--------------------+------+------------+--------------------+-----+
|                name|rating|rating_count|              cusine|count|
+--------------------+------+------------+--------------------+-----+
|            Bawarchi|   4.5|       42621|Biryani, Hyderaba...|    1|
|Byg Brewski Brewi...|   4.9|       19305|Continental, Nort...|    1|
|                Toit|   4.6|       15731|Italian, American...|    1|
|            Truffles|   4.6|       15653|Cafe, American, B...|    1|
|AB's - Absolute B...|   4.8|       13164|European, Mediter...|    1|
|            Paradise|   4.7|       13152|Biryani, North In...|    1|
|     The Black Pearl|   4.9|       12686|North Indian, Eur...|    1|
|Shah Ghouse Hotel...|   4.2|       12514|Biryani, North In...|    1|
|           Peter Cat|   4.2|       11917|Continental, Nort...|    1|
|    Lucky Restaurant|   4.3|       11103|Biryani, North In...|    1|
+--------------------+------+------------+--------------------+-----+



In [63]:
q3_py = q3.toPandas()
fig = px.bar(q3_py, x= 'cusine', y= 'rating_count', text= 'rating_count', color= 'cusine')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Most Preferred Cusines in Top 10 Restaurants in India', xaxis_title= 'Cusine', yaxis_title= 'Rating Count')

fig

#### Top 10 cuisine (Some major Cities)

In [68]:
q4_mumbai = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine', 'latitude', 'longitude').filter(df.city == 'Mumbai').orderBy(desc('rating_count')).limit(10)
q4_mumbai.show()

+--------------------+------+------------+------+--------------------+--------------------+-------------+-------------+
|                name|rating|rating_count|  city|                area|              cusine|     latitude|    longitude|
+--------------------+------+------------+------+--------------------+--------------------+-------------+-------------+
|             Candies|   4.1|       10300|Mumbai|Pali Hill, Bandra...|Cafe, Desserts, I...|19.0607755266|72.8269354999|
|        Joey's Pizza|   4.5|        9503|Mumbai|          Azad Nagar|               Pizza|19.1267929038|72.8299902007|
|        Prithvi Cafe|   4.4|        8401|Mumbai|                Juhu|      Cafe, Desserts|19.1060585385| 72.825872004|
|        Joey's Pizza|   4.5|        8022|Mumbai|          Malad West|               Pizza|19.1781877095|72.8346662968|
|Chili's American ...|   4.4|        7997|Mumbai|               Powai|American, Mexican...|19.1163373457|72.9093983397|
|Chili's American ...|   4.5|        797

In [71]:
top_10_cusine_mumbai = folium.Map(location= [19.0760, 72.8777], zoom_start= 12)
top_10_cusine_mum = folium.map.FeatureGroup()

for name, lat, lng, in zip(q4_mumbai.toPandas()['name'].values.tolist(), q4_mumbai.toPandas()['latitude'].values.tolist(), q4_mumbai.toPandas()['longitude'].values.tolist()):
  top_10_cusine_mum.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_cusine_mumbai.add_child(top_10_cusine_mum)

In [76]:
q4_mumbai_py = q4_mumbai.toPandas()
fig = px.bar(q4_mumbai_py, x= 'cusine', y= 'rating_count', text= 'rating_count', color= 'cusine')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Most Preferred Cusines in Mumbai', xaxis_title= 'Cusine', yaxis_title= 'Rating Count')

fig


For Mumbai, the street food along with the exotic food style is the main cuisine that the today's generation like and are penchant towards.

In [77]:
q4_hyderabad = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine', 'latitude', 'longitude').filter(df.city == 'Hyderabad').orderBy(desc('rating_count')).limit(10)
q4_hyderabad.show()

+--------------------+------+------------+---------+---------------+--------------------+-------------+-------------+
|                name|rating|rating_count|     city|           area|              cusine|     latitude|    longitude|
+--------------------+------+------------+---------+---------------+--------------------+-------------+-------------+
|            Bawarchi|   4.5|       42621|Hyderabad|     Nallakunta|Biryani, Hyderaba...|17.4058834625|78.4988379479|
|            Paradise|   4.7|       13152|Hyderabad|Paradise Circle|Biryani, North In...|17.4419196322|78.4872524813|
|Shah Ghouse Hotel...|   4.2|       12514|Hyderabad|     Gachibowli|Biryani, North In...|17.4269242726|78.3765272051|
|    Lucky Restaurant|   4.3|       11103|Hyderabad|         Nagole|Biryani, North In...|    17.365502|    78.557579|
|          Cafe Bahar|   4.6|       11000|Hyderabad|   Basheer Bagh|Biryani, North In...|17.3996227683|78.4786600247|
|AB's - Absolute B...|   4.9|       10086|Hyderabad|  Ju

In [79]:
top_10_cusine_hyderabad = folium.Map(location= [17.3850, 78.4867], zoom_start= 12)
top_10_cusine_hyd = folium.map.FeatureGroup()

for name, lat, lng, in zip(q4_hyderabad.toPandas()['name'].values.tolist(), q4_hyderabad.toPandas()['latitude'].values.tolist(), q4_hyderabad.toPandas()['longitude'].values.tolist()):
  top_10_cusine_hyd.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_cusine_hyderabad.add_child(top_10_cusine_hyd)

In [78]:
q4_hyderabad_py = q4_hyderabad.toPandas()
fig = px.bar(q4_hyderabad_py, x= 'cusine', y= 'rating_count', text= 'rating_count', color= 'cusine')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Most Preferred Cusines in Hyderabad', xaxis_title= 'Cusine', yaxis_title= 'Rating Count')

fig

It is no brainer that the city of Hyderabad, which is famous for its Biryani dishes will have the most favorable cuisines in most of the restaurants as it.

In [80]:
q4_bengaluru = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine', 'latitude', 'longitude').filter(df.city == 'Bengaluru').orderBy(desc('rating_count')).limit(10)
q4_bengaluru.show()

+--------------------+------+------------+---------+--------------------+--------------------+-------------+-------------+
|                name|rating|rating_count|     city|                area|              cusine|     latitude|    longitude|
+--------------------+------+------------+---------+--------------------+--------------------+-------------+-------------+
|Byg Brewski Brewi...|   4.9|       19305|Bengaluru|       Sarjapur Road|Continental, Nort...|12.9129344258|77.6829059422|
|                Toit|   4.6|       15731|Bengaluru|         Indiranagar|Italian, American...| 12.978888098| 77.640921995|
|            Truffles|   4.6|       15653|Bengaluru|Koramangala 5th B...|Cafe, American, B...| 12.933426714|77.6143838838|
|AB's - Absolute B...|   4.8|       13164|Bengaluru|        Marathahalli|European, Mediter...|   12.9497983|   77.6993079|
|     The Black Pearl|   4.9|       12686|Bengaluru|Koramangala 5th B...|North Indian, Eur...|12.9344733575|77.6159777865|
|      TBC Sky L

In [82]:
top_10_cusine_bengaluru = folium.Map(location= [12.9716, 77.5946], zoom_start= 12)
top_10_cusine_ben = folium.map.FeatureGroup()

for name, lat, lng, in zip(q4_bengaluru.toPandas()['name'].values.tolist(), q4_bengaluru.toPandas()['latitude'].values.tolist(), q4_bengaluru.toPandas()['longitude'].values.tolist()):
  top_10_cusine_ben.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_cusine_bengaluru.add_child(top_10_cusine_ben)

In [84]:
q4_bengaluru_py = q4_bengaluru.toPandas()
fig = px.bar(q4_bengaluru_py, x= 'cusine', y= 'rating_count', text= 'rating_count', color= 'cusine')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Most Preferred Cusines in Bangalore', xaxis_title= 'Cusine', yaxis_title= 'Rating Count')

fig

Banglore, otherwise known as Bengaluru has a mixture of dishes that the parochial people love and have them serve on their table. From its own country made dishes to the exotic European and American dishes, the people there loves to try new dishes.

In [85]:
q4_delhi = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine', 'latitude', 'longitude').filter(df.city == 'Delhi NCR').orderBy(desc('rating_count')).limit(10)
q4_delhi.show()

+--------------------+------+------------+---------+---------------+--------------------+-------------+-------------+
|                name|rating|rating_count|     city|           area|              cusine|     latitude|    longitude|
+--------------------+------+------------+---------+---------------+--------------------+-------------+-------------+
|      Warehouse Cafe|   4.1|       10035|Delhi NCR|Connaught Place|American, Contine...|28.6337385011|77.2209410369|
|  Lord Of The Drinks|   4.4|        9582|Delhi NCR|Connaught Place|European, Chinese...|28.6317444939|77.2166890651|
|             Tamasha|   4.5|        8866|Delhi NCR|Connaught Place|Finger Food, Nort...|28.6296624581|77.2218600288|
|     Saravana Bhavan|   4.4|        7906|Delhi NCR|Connaught Place|South Indian, Des...|28.6321402968|77.2164647654|
|The Flying Saucer...|   4.3|        7903|Delhi NCR|    Nehru Place|Continental, Bar ...|28.5520208928|77.2508771718|
|               Local|   4.4|        6966|Delhi NCR|Conn

In [86]:
top_10_cusine_delhi = folium.Map(location= [28.7041, 77.1025], zoom_start= 12)
top_10_cusine_del = folium.map.FeatureGroup()

for name, lat, lng, in zip(q4_delhi.toPandas()['name'].values.tolist(), q4_delhi.toPandas()['latitude'].values.tolist(), q4_delhi.toPandas()['longitude'].values.tolist()):
  top_10_cusine_del.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_cusine_delhi.add_child(top_10_cusine_del)

In [87]:
q4_delhi_py = q4_delhi.toPandas()
fig = px.bar(q4_delhi_py, x= 'cusine', y= 'rating_count', text= 'rating_count', color= 'cusine')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Most Preferred Cusines in Delhi', xaxis_title= 'Cusine', yaxis_title= 'Rating Count')

fig

Delhi also shows wide diversity in the tastes of the local people. They have South Indian and North Indian dishes to European as well as some American ones to. 

In [88]:
q4_kolkata = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine', 'latitude', 'longitude').filter(df.city == 'Kolkata').orderBy(desc('rating_count')).limit(10)
q4_kolkata.show()

+-------------------+------+------------+-------+-------------------+--------------------+-------------+-------------+
|               name|rating|rating_count|   city|               area|              cusine|     latitude|    longitude|
+-------------------+------+------------+-------+-------------------+--------------------+-------------+-------------+
|          Peter Cat|   4.2|       11917|Kolkata|   Park Street Area|Continental, Nort...| 22.552630386|88.3527230099|
|    Barbeque Nation|   4.7|        8551|Kolkata|Sector 5, Salt Lake|North Indian, Chi...|22.5691537814|88.4333170205|
|              BarBQ|   4.4|        8052|Kolkata|   Park Street Area|Chinese, North In...|22.5527511443|88.3525563776|
|Chili's Grill & Bar|   4.7|        7615|Kolkata|         Ballygunge|American, Mexican...| 22.538751149| 88.365656957|
|            Mocambo|   4.2|        6466|Kolkata|   Park Street Area|         Continental|22.5529654125|88.3531799912|
|            Arsalan|   4.1|        5963|Kolkata

In [89]:
top_10_cusine_kolkata = folium.Map(location= [22.5726, 88.3639], zoom_start= 12)
top_10_cusine_kol = folium.map.FeatureGroup()

for name, lat, lng, in zip(q4_kolkata.toPandas()['name'].values.tolist(), q4_kolkata.toPandas()['latitude'].values.tolist(), q4_kolkata.toPandas()['longitude'].values.tolist()):
  top_10_cusine_kol.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_cusine_kolkata.add_child(top_10_cusine_kol)

In [90]:
q4_kolkata_py = q4_kolkata.toPandas()
fig = px.bar(q4_delhi_py, x= 'cusine', y= 'rating_count', text= 'rating_count', color= 'cusine')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Most Preferred Cusines in Kolkata', xaxis_title= 'Cusine', yaxis_title= 'Rating Count')

fig

Kolkata prefers Continential along with Chinese dishes along with a other offerings.

In [91]:
q4_chennai = df.select('name', 'rating', 'rating_count', 'city', 'area', 'cusine', 'latitude', 'longitude').filter(df.city == 'Chennai').orderBy(desc('rating_count')).limit(10)
q4_chennai.show()

+--------------------+------+------------+-------+------------+--------------------+-------------+-------------+
|                name|rating|rating_count|   city|        area|              cusine|     latitude|    longitude|
+--------------------+------+------------+-------+------------+--------------------+-------------+-------------+
|AB's - Absolute B...|   4.9|       10357|Chennai|    T. Nagar|BBQ, North Indian...|13.0452596662|80.2412599325|
|      Coal Barbecues|   4.9|        8171|Chennai|   Velachery|North Indian, Chi...|12.9860573362|80.2182241157|
|      Coal Barbecues|   4.9|        7348|Chennai|    T. Nagar|North Indian, Med...|13.0468091625|80.2370213717|
|     Barbeque Nation|   4.8|        5799|Chennai|    T. Nagar|North Indian, Con...|13.0467614758|80.2348900214|
|              Onesta|   4.9|        5126|Chennai| Semmancheri|Pizza, Italian, F...|12.8750884413|80.2273225039|
|            Paradise|   4.4|        5106|Chennai|   Perungudi|Biryani, North In...|12.972456712

In [93]:
top_10_cusine_chennai = folium.Map(location= [13.0827, 80.2707], zoom_start= 12)
top_10_cusine_chen = folium.map.FeatureGroup()

for name, lat, lng, in zip(q4_chennai.toPandas()['name'].values.tolist(), q4_chennai.toPandas()['latitude'].values.tolist(), q4_chennai.toPandas()['longitude'].values.tolist()):
  top_10_cusine_chen.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_cusine_chennai.add_child(top_10_cusine_chen)

In [94]:
q4_chennai_py = q4_chennai.toPandas()
fig = px.bar(q4_chennai_py, x= 'cusine', y= 'rating_count', text= 'rating_count', color= 'cusine')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Most Preferred Cusines in Chennai', xaxis_title= 'Cusine', yaxis_title= 'Rating Count')

fig

People in Chennai has varies restaurant ranging from BBQ, Seafood, Biryani, and some North Indian cuisines.

#### What are the top 10 costliest restaurants in the country?

In [95]:
q5 = df.select('name', 'rating', 'cost_for_two', 'city').orderBy(desc('cost_for_two')).limit(10)
q5.show()

+--------------------+------+------------+---------+
|                name|rating|cost_for_two|     city|
+--------------------+------+------------+---------+
|Ocean - The Priva...|   3.6|       30000|   Mumbai|
|Gol Bungalow - Ta...|   3.9|       15000|Hyderabad|
|              Bhairo|   2.9|       15000|  Udaipur|
|          Fly Dining|   3.8|       14000|Bengaluru|
|Pillars - Umaid B...|   3.7|       12000|  Jodhpur|
|Risala- Umaid Bha...|   4.1|       12000|  Jodhpur|
|Trophy Bar- Umaid...|   3.3|       12000|  Jodhpur|
|Wasabi By Morimot...|   4.3|       10000|   Mumbai|
|Orient Express - ...|   4.2|        8000|Delhi NCR|
|Yuuka - The St. R...|   4.1|        8000|   Mumbai|
+--------------------+------+------------+---------+



In [104]:
q5_py = q5.toPandas()
fig = px.bar(q5_py, x= 'name', y= 'cost_for_two', text= 'cost_for_two', color= 'name')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Top 10 Costliest Restaurants in the India', xaxis_title= 'Restaurants', yaxis_title= 'Cost for Two')

fig

#### What is the top 10 costliest restaurants in specific city?

In [106]:
q6_mumbai = df.select('name', 'rating', 'cost_for_two', 'area', 'city', 'latitude', 'longitude').filter(df.city == 'Mumbai').orderBy(desc('cost_for_two')).limit(10)
q6_mumbai.show()

+--------------------+------+------------+--------------------+------+-------------+-------------+
|                name|rating|cost_for_two|                area|  city|     latitude|    longitude|
+--------------------+------+------------+--------------------+------+-------------+-------------+
|Ocean - The Priva...|   3.6|       30000|     Vile Parle East|Mumbai|19.0947516196|72.8542259708|
|Wasabi By Morimot...|   4.3|       10000|              Colaba|Mumbai|18.9221397481|72.8337563574|
|Yuuka - The St. R...|   4.1|        8000|         Lower Parel|Mumbai|18.9945723959|72.8237218782|
|              Masque|   4.3|        6500|           Mahalaxmi|Mumbai|18.9898008573|72.8257948905|
|Masala Kraft - Th...|   4.1|        6500|              Colaba|Mumbai|18.9221397481|72.8337566927|
|Le Cirque Signatu...|   4.0|        6000|             Chakala|Mumbai|19.1099045333|72.8741343319|
|Celini - Grand Hyatt|   4.2|        6000|      Santacruz East|Mumbai|19.0785225332|72.8512286022|
|Vista - T

In [109]:
top_10_costliest_mumbai = folium.Map(location= [19.0760, 72.8777], zoom_start= 12)
top_10_costliest_mum = folium.map.FeatureGroup()

for name, lat, lng, in zip(q6_mumbai.toPandas()['name'].values.tolist(), q6_mumbai.toPandas()['latitude'].values.tolist(), q6_mumbai.toPandas()['longitude'].values.tolist()):
  top_10_costliest_mum.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_costliest_mumbai.add_child(top_10_costliest_mum)

In [107]:
q6_mumbai_py = q6_mumbai.toPandas()
fig = px.bar(q6_mumbai_py, x= 'name', y= 'cost_for_two', text= 'cost_for_two', color= 'name')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Top 10 Costliest Restaurants in the Mumbai', xaxis_title= 'Restaurants', yaxis_title= 'Cost for Two')

fig

The most costliest hotel in the country is **Ocean - The Private Dining Room - Sahara Star** which is in Mumbai.

In [110]:
q6_hyderabad = df.select('name', 'rating', 'cost_for_two', 'area', 'city', 'latitude', 'longitude').filter(df.city == 'Hyderabad').orderBy(desc('cost_for_two')).limit(10)
q6_hyderabad.show()

+--------------------+------+------------+-------------+---------+-------------+-------------+
|                name|rating|cost_for_two|         area|     city|     latitude|    longitude|
+--------------------+------+------------+-------------+---------+-------------+-------------+
|Gol Bungalow - Ta...|   3.9|       15000|    Falaknuma|Hyderabad|  17.33433806|78.4676032886|
|Adaa - Taj Falakn...|   4.5|        8000|    Falaknuma|Hyderabad|17.3342833316|78.4675684199|
|Celeste - Taj Fal...|   4.4|        7000|    Falaknuma|Hyderabad| 17.334312136|78.4675623849|
|Vineela Yadlapall...|   3.5|        5000|Jubilee Hills|Hyderabad|     17.43328|    78.404894|
|Thai Pavilion - V...|   4.3|        4000|     Begumpet|Hyderabad|17.4436660625|78.4609145299|
|Altitude - Hydera...|   4.1|        4000|Necklace Road|Hyderabad|17.4252295042|78.4868296981|
|  Prego - The Westin|   4.2|        3500|  Hitech City|Hyderabad|17.4419548168| 78.381065838|
|Dakshin - ITC Kak...|   4.4|        3400|     Beg

In [111]:
top_10_costliest_hyderabad = folium.Map(location= [17.3850, 78.4867], zoom_start= 12)
top_10_costliest_hyd = folium.map.FeatureGroup()

for name, lat, lng, in zip(q6_hyderabad.toPandas()['name'].values.tolist(), q6_hyderabad.toPandas()['latitude'].values.tolist(), q6_hyderabad.toPandas()['longitude'].values.tolist()):
  top_10_costliest_hyd.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_costliest_hyderabad.add_child(top_10_costliest_hyd)

In [112]:
q6_hyderabad_py = q6_hyderabad.toPandas()
fig = px.bar(q6_hyderabad_py, x= 'name', y= 'cost_for_two', text= 'cost_for_two', color= 'name')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Top 10 Costliest Restaurants in the Hyderabad', xaxis_title= 'Restaurants', yaxis_title= 'Cost for Two')

fig

For the city of Hyderabad, the most costly restaurant is the **Gol Bungalow - Taj Falaknuma Palace** 

In [113]:
q6_bengaluru = df.select('name', 'rating', 'cost_for_two', 'area', 'city', 'latitude', 'longitude').filter(df.city == 'Bengaluru').orderBy(desc('cost_for_two')).limit(10)
q6_bengaluru.show()

+--------------------+------+------------+-----------------+---------+-------------+-------------+
|                name|rating|cost_for_two|             area|     city|     latitude|    longitude|
+--------------------+------+------------+-----------------+---------+-------------+-------------+
|          Fly Dining|   3.8|       14000|         Nagawara|Bengaluru| 13.047513357|77.6103280485|
|Le Cirque Signatu...|   4.2|        6000| Old Airport Road|Bengaluru|12.9596221967|77.6487148181|
|Royal Afghan - IT...|   4.3|        6000|      Sankey Road|Bengaluru|12.9946448598|77.5852289423|
|         Grasshopper|   4.3|        5000|Bannerghatta Road|Bengaluru|12.8641891753|77.5894108415|
|Dakshin - ITC Win...|   4.3|        5000|      Sankey Road|Bengaluru|12.9946461666|77.5852343068|
|Dum Pukht Jolly N...|   4.3|        5000|      Sankey Road|Bengaluru|12.9946693616|77.5853553414|
|Malties - Radisso...|   4.2|        4500|     Marathahalli|Bengaluru|12.9498869046|77.6992553473|
|La Brasse

In [114]:
top_10_costliest_bengaluru = folium.Map(location= [12.9716, 77.5946], zoom_start= 12)
top_10_costliest_ben = folium.map.FeatureGroup()

for name, lat, lng, in zip(q6_bengaluru.toPandas()['name'].values.tolist(), q6_bengaluru.toPandas()['latitude'].values.tolist(), q6_bengaluru.toPandas()['longitude'].values.tolist()):
  top_10_costliest_ben.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_costliest_bengaluru.add_child(top_10_costliest_ben)

In [115]:
q6_bengaluru_py = q6_bengaluru.toPandas()
fig = px.bar(q6_bengaluru_py, x= 'name', y= 'cost_for_two', text= 'cost_for_two', color= 'name')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Top 10 Costliest Restaurants in the Bangalore', xaxis_title= 'Restaurants', yaxis_title= 'Cost for Two')

fig

**Fly Dining** in Bangalore is the most expensive restaurant in that city.

In [116]:
q6_delhi = df.select('name', 'rating', 'cost_for_two', 'area', 'city', 'latitude', 'longitude').filter(df.city == 'Delhi NCR').orderBy(desc('cost_for_two')).limit(10)
q6_delhi.show()

+--------------------+------+------------+-------------+---------+-------------+-------------+
|                name|rating|cost_for_two|         area|     city|     latitude|    longitude|
+--------------------+------+------------+-------------+---------+-------------+-------------+
|Orient Express - ...|   4.2|        8000| Chanakyapuri|Delhi NCR| 28.595005434|77.1718254313|
|Tian - Asian Cuis...|   4.1|        7000| Chanakyapuri|Delhi NCR|28.5979671567|77.1733117104|
|Ottimo At West Vi...|   4.1|        6600| Chanakyapuri|Delhi NCR|28.5979530268|77.1732996404|
|Bukhara - ITC Maurya|   4.3|        6500| Chanakyapuri|Delhi NCR|28.5979218233|77.1732453257|
|Nostalgia at 1911...|   3.5|        6000|      Janpath|Delhi NCR|28.6250391609|77.2191154584|
|The Theatre Club ...|   3.8|        6000|Paschim Vihar|Delhi NCR|28.6675290399|77.0921118557|
|Amaranta - The Ob...|   4.1|        6000|  Udyog Vihar|Delhi NCR|28.5017066348|77.0867534727|
|The Spice Route -...|   4.1|        6000|      Ja

In [117]:
top_10_costliest_delhi = folium.Map(location= [28.7041, 77.1025], zoom_start= 12)
top_10_costliest_del = folium.map.FeatureGroup()

for name, lat, lng, in zip(q6_delhi.toPandas()['name'].values.tolist(), q6_delhi.toPandas()['latitude'].values.tolist(), q6_delhi.toPandas()['longitude'].values.tolist()):
  top_10_costliest_del.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_costliest_delhi.add_child(top_10_costliest_del)

In [118]:
q6_delhi_py = q6_delhi.toPandas()
fig = px.bar(q6_delhi_py, x= 'name', y= 'cost_for_two', text= 'cost_for_two', color= 'name')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Top 10 Costliest Restaurants in the Delhi', xaxis_title= 'Restaurants', yaxis_title= 'Cost for Two')

fig

**Orient Express - Taj Palace** is the exorbitant restaurant in the capital.

In [120]:
q6_kolkata = df.select('name', 'rating', 'cost_for_two', 'area', 'city', 'latitude', 'longitude').filter(df.city == 'Kolkata').orderBy(desc('cost_for_two')).limit(10)
q6_kolkata.show()

+--------------------+------+------------+-------------------+-------+-------------+-------------+
|                name|rating|cost_for_two|               area|   city|     latitude|    longitude|
+--------------------+------+------------+-------------------+-------+-------------+-------------+
|Chinoiserie - Taj...|   4.3|        5000|            Alipore|Kolkata|22.5351267483|88.3337970078|
|Guchhi - Hyatt Re...|   4.0|        4600|Sector 3, Salt Lake|Kolkata|22.5716872122|88.4054357931|
|Eden Pavilion - I...|   4.0|        4500|  Science City Area|Kolkata|22.5460092642|88.3990199491|
|West View Bar & G...|   3.8|        4500|  Science City Area|Kolkata|22.5460092642|88.3990199491|
|     Seasonal Tastes|   4.3|        4200|           New Town|Kolkata| 22.591442226|88.4738874435|
|The Junction - Ta...|   3.8|        4200|            Alipore|Kolkata|22.5351323225|88.3338060603|
|     Seasonal Tastes|   4.4|        4200|           New Town|Kolkata| 22.591442226|88.4738874435|
|      Zen

In [121]:
top_10_costliest_kolkata = folium.Map(location= [22.5726, 88.3639], zoom_start= 12)
top_10_costliest_kol = folium.map.FeatureGroup()

for name, lat, lng, in zip(q6_kolkata.toPandas()['name'].values.tolist(), q6_kolkata.toPandas()['latitude'].values.tolist(), q6_kolkata.toPandas()['longitude'].values.tolist()):
  top_10_costliest_kol.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_costliest_kolkata.add_child(top_10_costliest_kol)

In [122]:
q6_kolkata_py = q6_kolkata.toPandas()
fig = px.bar(q6_kolkata_py, x= 'name', y= 'cost_for_two', text= 'cost_for_two', color= 'name')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Top 10 Costliest Restaurants in the Kolkata', xaxis_title= 'Restaurants', yaxis_title= 'Cost for Two')

fig

**Chinoiserie - Taj Bengal** is the most expensive restaurant in the city of Kolkata

In [123]:
q6_chennai = df.select('name', 'rating', 'cost_for_two', 'area', 'city', 'latitude', 'longitude').filter(df.city == 'Chennai').orderBy(desc('cost_for_two')).limit(10)
q6_chennai.show()

+--------------------+------+------------+---------------+-------+-------------+-------------+
|                name|rating|cost_for_two|           area|   city|     latitude|    longitude|
+--------------------+------+------------+---------------+-------+-------------+-------------+
|         Tao of Peng|   4.5|        5500|  Mahabalipuram|Chennai|12.7367982233|80.2356574684|
|Royal Vega - ITC ...|   4.4|        5500|         Guindy|Chennai|13.0115629085|80.2200607583|
|Blend - Taj Club ...|   4.0|        5000|Thousand Lights|Chennai|13.0614315653|80.2641633153|
|Ottimo Cucina Ita...|   4.1|        5000|         Guindy|Chennai| 13.011537755|80.2200694755|
|Salt. Co. 531 - R...|   4.1|        4800|       GST Road|Chennai|12.9947863166| 80.187372677|
|Peshawri - ITC Gr...|   4.4|        4500|         Guindy|Chennai|13.0115109683| 80.220127143|
|Avartana - ITC Gr...|   4.7|        4500|         Guindy|Chennai|13.0115446151|80.2201690525|
|Pan Asian - ITC G...|   4.5|        4200|        

In [124]:
top_10_costliest_chennai = folium.Map(location= [13.0827, 80.2707], zoom_start= 12)
top_10_costliest_chen = folium.map.FeatureGroup()

for name, lat, lng, in zip(q6_chennai.toPandas()['name'].values.tolist(), q6_chennai.toPandas()['latitude'].values.tolist(), q6_chennai.toPandas()['longitude'].values.tolist()):
  top_10_costliest_chen.add_child(
      folium.Marker(
          [lat, lng],
          popup= name,
      )
  )

top_10_costliest_chennai.add_child(top_10_costliest_chen)

In [125]:
q6_chennai_py = q6_chennai.toPandas()
fig = px.bar(q6_chennai_py, x= 'name', y= 'cost_for_two', text= 'cost_for_two', color= 'name')
fig.update_traces(texttemplate= '%{text:.2s}', textposition= 'outside')
fig.update_layout(xaxis_tickangle= -45)
fig.update_layout(title= 'Top 10 Costliest Restaurants in the Chennai', xaxis_title= 'Restaurants', yaxis_title= 'Cost for Two')

fig

**Tao of Peng** is the costliest restaurant in the city of Chennai.

#### Top 10 cheapest restaurant

In [None]:
q8 = df.select('name', 'rating', 'cost_for_two', 'city').filter(df.cost_for_two > 50).orderBy(asc('cost_for_two')).limit(10)
q8.show()

+--------------------+------+------------+---------+
|                name|rating|cost_for_two|     city|
+--------------------+------+------------+---------+
|            18 Dosas|   2.7|          60|Hyderabad|
|          Mamee Soup|   4.1|          60|  Chennai|
|     Sanskriti Chaat|   3.8|          60|   Jaipur|
|Shri Gokul Prasad...|   3.2|          60|  Lucknow|
|   Chak De Fast Food|   3.3|          60|   Kanpur|
|          Vas Bakery|   3.4|          60|Mangalore|
|       Ashoka Bakery|   2.9|          60|   Kanpur|
|     Shankar's Chaat|   4.0|          60|    Ajmer|
|     Student's Lassi|   2.9|          60|Gorakhpur|
| Makhija Enterprises|   3.2|          60|  Gwalior|
+--------------------+------+------------+---------+



#### What are the top 10 cheapest restaurants in the specific cities?

In [None]:
q9_mumbai = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 40) & (df.city == 'Mumbai')).orderBy(asc('cost_for_two')).limit(10)
q9_mumbai.show()

+--------------------+------+------------+------+--------------------+
|                name|rating|cost_for_two|  city|                area|
+--------------------+------+------------+------+--------------------+
|     Shegaon Kachori|   3.3|          50|Mumbai|  Khopat, Thane West|
|Kullad Chai Coffe...|   3.1|          50|Mumbai|Hiranandani Estat...|
|           Ho5 Store|   3.3|          50|Mumbai|        Matunga West|
|Jai Ambika Pan Bh...|   3.2|          50|Mumbai|          Ulhasnagar|
|Batulz- Cakes N More|   2.9|          50|Mumbai|             Kamothe|
|   Ornamental Mithai|   4.3|         100|Mumbai|         Mulund West|
|   Hotel Maharashtra|   3.3|         100|Mumbai|       Borivali West|
|          J.J Jalebi|   3.8|         100|Mumbai|               Kurla|
| Jhama Silver Sweets|   3.7|         100|Mumbai|      Kopar Khairane|
|          Misal Ghar|   3.2|         100|Mumbai| Naupada, Thane West|
+--------------------+------+------------+------+--------------------+



In [None]:
q9_hyderabad = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 40) & (df.city == 'Hyderabad')).orderBy(asc('cost_for_two')).limit(10)
q9_hyderabad.show()

+--------------------+------+------------+---------+------------+
|                name|rating|cost_for_two|     city|        area|
+--------------------+------+------------+---------+------------+
|         Sohail Cafe|   3.3|          50|Hyderabad| Musheerabad|
|  Zar Zari Zar Baksh|   3.0|          50|Hyderabad| Musheerabad|
|Sri Gayatri Dabel...|   3.3|          50|Hyderabad|    Kothapet|
|     Naidu Tea Stall|   3.1|          50|Hyderabad|  Kukatpally|
|   K.G.N Veg Biryani|   2.9|          50|Hyderabad|   Charminar|
|        Chai Deewane|   3.2|          50|Hyderabad|  Kukatpally|
|            18 Dosas|   2.7|          60|Hyderabad|   L B Nagar|
|      Delicious Dosa|   3.9|          80|Hyderabad|Dilsukhnagar|
|Jai Durga Chinese...|   3.1|         100|Hyderabad| Marredpally|
|     Laxman ki Bandi|   4.2|         100|Hyderabad|Begum Bazaar|
+--------------------+------+------------+---------+------------+



In [None]:
q9_bengaluru = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 30) & (df.city == 'Bengaluru')).orderBy(asc('cost_for_two')).limit(10)
q9_bengaluru.show()

+--------------------+------+------------+---------+-------------+
|                name|rating|cost_for_two|     city|         area|
+--------------------+------+------------+---------+-------------+
|Shree Vishnu That...|   3.9|          40|Bengaluru|       Domlur|
|            VarieTea|   3.7|          50|Bengaluru| New BEL Road|
|          Lassi Shop|   3.8|          60|Bengaluru|   Nagarbhavi|
|   Nellore Dosa Camp|   3.3|          70|Bengaluru|          BTM|
|          Lassi Shop|   3.8|          80|Bengaluru|     JP Nagar|
|Om Ganesha Fruit ...|   3.4|          80|Bengaluru|Vasanth Nagar|
|           Dosa Cafe|   3.4|          80|Bengaluru|  Vijay Nagar|
|     Rajastani Rasoi|   3.2|         100|Bengaluru| Basavanagudi|
|         Cake Palace|   3.4|         100|Bengaluru|  Indiranagar|
|         Coffee Time|   4.1|         100|Bengaluru|      Kengeri|
+--------------------+------+------------+---------+-------------+



In [None]:
q9_delhi = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 40) & (df.city == 'Delhi NCR')).orderBy(asc('cost_for_two')).limit(10)
q9_delhi.show()

+--------------------+------+------------+---------+-------------------+
|                name|rating|cost_for_two|     city|               area|
+--------------------+------+------------+---------+-------------------+
|  Gopal Kachori Wala|   3.3|          50|Delhi NCR|       Punjabi Bagh|
| Shri Saheb Ji Dairy|   3.7|          50|Delhi NCR|     Dilshad Garden|
|       Duggal Snacks|   3.9|          50|Delhi NCR|Mayur Vihar Phase 2|
|Pandit Ved Prakas...|   3.9|          50|Delhi NCR|      Chandni Chowk|
| Shri Ram Poori Wale|   2.8|          50|Delhi NCR|        Kirti Nagar|
|Aggarwal Jalebi Wale|   2.7|          50|Delhi NCR|        Uttam Nagar|
| India Coffee Centre|   2.9|          50|Delhi NCR|    Connaught Place|
|        Bansal Sweet|   2.6|          50|Delhi NCR|Mayur Vihar Phase 1|
|         Ram Kachori|   3.6|          50|Delhi NCR|      Kashmiri Gate|
|Shree Ram Poori Wale|   3.0|          50|Delhi NCR|          Jail Road|
+--------------------+------+------------+---------

In [None]:
q9_kolkata = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 40) & (df.city == 'Kolkata')).orderBy(asc('cost_for_two')).limit(10)
q9_kolkata.show()

+--------------------+------+------------+-------+-----------------+
|                name|rating|cost_for_two|   city|             area|
+--------------------+------+------------+-------+-----------------+
|    Ramji Ghugniwala|   2.9|          80|Kolkata| Park Street Area|
|  Ramesh Chilla Wala|   3.1|          80|Kolkata| Park Street Area|
|Narendra Mistanna...|   3.5|         100|Kolkata|       Tollygunge|
|        Ghosh Sweets|   3.1|         100|Kolkata|          Shibpur|
|Shib Namkin & Sweets|   3.3|         100|Kolkata|    Picnic Garden|
|       Shankar Cabin|   3.8|         100|Kolkata|          Golpark|
|Laxmi Narayan Mis...|   3.2|         100|Kolkata|        Baguihati|
|Shree  Hari Mista...|   3.3|         100|Kolkata|         Gariahat|
|Sri Krishna Hot C...|   3.1|         100|Kolkata|Camac Street Area|
|Haldiram & Sons B...|   3.3|         100|Kolkata|        Bow Bazar|
+--------------------+------+------------+-------+-----------------+



In [None]:
q9_chennai = df.select('name', 'rating', 'cost_for_two', 'city', 'area').filter((df.cost_for_two >= 30) & (df.city == 'Chennai')).orderBy(asc('cost_for_two')).limit(10)
q9_chennai.show()

+--------------------+------+------------+-------+---------------+
|                name|rating|cost_for_two|   city|           area|
+--------------------+------+------------+-------+---------------+
|            Soda Hub|   3.3|          40|Chennai|       Navallur|
|     Saravana Coffee|   3.1|          50|Chennai|    Kodambakkam|
|          Mamee Soup|   4.1|          60|Chennai|  West Mambalam|
|             WN Cafe|   3.3|         100|Chennai|    Nanganallur|
|             Mango's|   3.9|         100|Chennai|          Adyar|
|        Mumbai Kulfi|   3.8|         100|Chennai| Sholinganallur|
|Chapati Parantha ...|   3.6|         100|Chennai|  Thiruvanmiyur|
|      Lassi Junction|   3.7|         100|Chennai|Anna Nagar West|
|      Beeda And Soda|   3.4|         100|Chennai|          Adyar|
|  Madurai Sri Bhavan|   3.5|         100|Chennai|  Thiruvanmiyur|
+--------------------+------+------------+-------+---------------+

