#  📊 Zomato Bangalore Restaurant Trends

###🎯 ObjectiveThe aim of this hackathon is to perform data-driven analysis of restaurant trends in Bangalore using Zomato’s real-world dataset. Participants are expected to:

- **Clean and preprocess the data**


- **Perform exploratory data analysis (EDA)**


- **Merge location data for mapping**


- **Extract actionable insights**


- **Answer MCQ-style reasoning questions based on analysis**


In [212]:
# Importing required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from IPython.core.display import display, HTML
import time
from collections import Counter


In [148]:
# Loading the data
zomato = pd.read_csv(r'/content/drive/MyDrive/Files.csv/zomato_data.csv')
geo = pd.read_csv(r'/content/drive/MyDrive/Files.csv/Geographical Coordinates.csv')
print(zomato.shape)
print(geo.shape)

(51717, 10)
(26, 3)


### Statistical Analysis

In [149]:
zomato.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   online_order               51717 non-null  object
 1   book_table                 51717 non-null  object
 2   rate                       43942 non-null  object
 3   votes                      51717 non-null  int64 
 4   rest_type                  51490 non-null  object
 5   dish_liked                 23639 non-null  object
 6   cuisines                   51672 non-null  object
 7   approx_costfor_two_people  51371 non-null  object
 8   listed_intype              51717 non-null  object
 9   listed_incity              51717 non-null  object
dtypes: int64(1), object(9)
memory usage: 3.9+ MB


In [150]:
zomato.head()

Unnamed: 0,online_order,book_table,rate,votes,rest_type,dish_liked,cuisines,approx_costfor_two_people,listed_intype,listed_incity
0,Yes,Yes,4.1/5,775,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet,Banashankari
1,Yes,No,4.1/5,787,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet,Banashankari
2,Yes,No,3.8/5,918,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet,Banashankari
3,No,No,3.7/5,88,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet,Banashankari
4,No,No,3.8/5,166,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet,Banashankari


In [213]:
zomato.tail()

Unnamed: 0,online_order,book_table,rate,votes,rest_type,dish_liked,cuisines,approx_costfor_two_people,listed_intype,listed_incity,revenue_estimate
51712,0,0,3.6,27,Bar,Not Available,Continental,1500,Pubs and bars,Whitefield,40500
51713,0,0,3.7,41,Bar,Not Available,Finger Food,600,Pubs and bars,Whitefield,24600
51714,0,0,3.7,41,Bar,Not Available,Finger Food,2000,Pubs and bars,Whitefield,82000
51715,0,1,4.3,236,Bar,"Cocktails, Pizza, Buttermilk",Finger Food,2500,Pubs and bars,Whitefield,590000
51716,0,0,3.4,13,"Bar, Casual Dining",Not Available,"Finger Food, North Indian, Continental",1500,Pubs and bars,Whitefield,19500


In [214]:
zomato.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
online_order,51717.0,0.588665,0.4920804,0.0,0.0,1.0,1.0,1.0
book_table,51717.0,0.124698,0.3303792,0.0,0.0,0.0,0.0,1.0
rate,51717.0,3.700362,0.3953908,1.8,3.5,3.7,3.9,4.9
votes,51717.0,291.646693,801.1924,1.0,29.0,41.0,198.0,16832.0
approx_costfor_two_people,51717.0,554.391689,437.5637,40.0,300.0,400.0,650.0,6000.0
revenue_estimate,51717.0,294113.852679,1081025.0,350.0,8200.0,20500.0,110400.0,26931200.0


## Data Cleaning & Preprocessing

In [151]:
zomato[['rate']].isna().sum()

Unnamed: 0,0
rate,7775


In [152]:
zomato['rate'].value_counts()

Unnamed: 0_level_0,count
rate,Unnamed: 1_level_1
NEW,2208
3.9/5,2098
3.8/5,2022
3.7/5,2011
3.9 /5,1874
...,...
2.2 /5,7
2.0 /5,7
2.0/5,4
1.8 /5,3


In [153]:
# Creating a function for cleaning th erating column.
def clean_rating_column(df, column='rate'):
    df[column] = df[column].replace(['-', 'NEW', 'nan', 'NaN'], np.nan)
    df[column] = df[column].astype(str).str.replace('/5', '', regex=False)
    df[column] = pd.to_numeric(df[column], errors='coerce')
    median_rating = df[column].median()
    df[column] = df[column].fillna(median_rating)

    return df



In [154]:
zomato = clean_rating_column(zomato, 'rate')

In [155]:
zomato['rate'].isna().sum()

np.int64(0)

In [156]:
zomato['rate'].value_counts()

Unnamed: 0_level_0,count
rate,Unnamed: 1_level_1
3.7,13873
3.9,3972
3.8,3873
3.6,3316
4.0,3183
4.1,2948
3.5,2784
3.4,2476
3.3,2310
4.2,2184


In [157]:
zomato.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   online_order               51717 non-null  object 
 1   book_table                 51717 non-null  object 
 2   rate                       51717 non-null  float64
 3   votes                      51717 non-null  int64  
 4   rest_type                  51490 non-null  object 
 5   dish_liked                 23639 non-null  object 
 6   cuisines                   51672 non-null  object 
 7   approx_costfor_two_people  51371 non-null  object 
 8   listed_intype              51717 non-null  object 
 9   listed_incity              51717 non-null  object 
dtypes: float64(1), int64(1), object(8)
memory usage: 3.9+ MB


In [158]:
# creating a function for cleaning the cost column
def clean_cost_column(df, column='approx_costfor_two_people'):
    df[column] = df[column].astype(str).str.replace(',', '', regex=False)
    df[column] = pd.to_numeric(df[column], errors='coerce')

    median_cost = df[column].median()
    df[column] = df[column].fillna(median_cost)

    return df



In [159]:
zomato = clean_cost_column(zomato, 'approx_costfor_two_people')

In [160]:
zomato['approx_costfor_two_people'].isna().sum()

np.int64(0)

In [211]:
zomato['approx_costfor_two_people'].median()

400.0

In [161]:
zomato['approx_costfor_two_people'].value_counts()

Unnamed: 0_level_0,count
approx_costfor_two_people,Unnamed: 1_level_1
300.0,7576
400.0,6908
500.0,4980
200.0,4857
600.0,3714
...,...
469.0,1
60.0,1
560.0,1
3700.0,1


In [162]:
# Imputing the missing Values
zomato['dish_liked'] = zomato['dish_liked'].replace(['NaN', np.nan], 'Not Available')
zomato['cuisines'] = zomato['cuisines'].replace(['NaN', np.nan], 'Other')
zomato['rest_type'] = zomato['rest_type'].replace(['NaN', np.nan], 'Unknown')

In [163]:
zomato.isna().sum()

Unnamed: 0,0
online_order,0
book_table,0
rate,0
votes,0
rest_type,0
dish_liked,0
cuisines,0
approx_costfor_two_people,0
listed_intype,0
listed_incity,0


In [164]:
zomato.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   online_order               51717 non-null  object 
 1   book_table                 51717 non-null  object 
 2   rate                       51717 non-null  float64
 3   votes                      51717 non-null  int64  
 4   rest_type                  51717 non-null  object 
 5   dish_liked                 51717 non-null  object 
 6   cuisines                   51717 non-null  object 
 7   approx_costfor_two_people  51717 non-null  float64
 8   listed_intype              51717 non-null  object 
 9   listed_incity              51717 non-null  object 
dtypes: float64(2), int64(1), object(7)
memory usage: 3.9+ MB


In [165]:
# Imputing the missing values with Median votes
median_vote = zomato['votes'].median()
zomato['votes'] = zomato['votes'].replace(0, median_vote)

In [166]:
median_vote

41.0

In [167]:
zomato['votes'].value_counts()

Unnamed: 0_level_0,count
votes,Unnamed: 1_level_1
41,10266
4,1140
6,992
7,872
9,738
...,...
4957,1
2382,1
4119,1
4048,1


In [168]:
# Converting catogiries into binary
zomato['online_order'] = zomato['online_order'].map({'Yes': 1, 'No': 0})
zomato['book_table'] = zomato['book_table'].map({'Yes': 1, 'No': 0})

In [169]:
zomato['book_table'].value_counts()

Unnamed: 0_level_0,count
book_table,Unnamed: 1_level_1
0,45268
1,6449


In [170]:
# Type Converstion
zomato['rate'] = zomato['rate'].astype(float)
zomato['votes'] = zomato['votes'].astype(int)
zomato['approx_costfor_two_people'] = zomato['approx_costfor_two_people'].astype(int)

In [171]:
zomato.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   online_order               51717 non-null  int64  
 1   book_table                 51717 non-null  int64  
 2   rate                       51717 non-null  float64
 3   votes                      51717 non-null  int64  
 4   rest_type                  51717 non-null  object 
 5   dish_liked                 51717 non-null  object 
 6   cuisines                   51717 non-null  object 
 7   approx_costfor_two_people  51717 non-null  int64  
 8   listed_intype              51717 non-null  object 
 9   listed_incity              51717 non-null  object 
dtypes: float64(1), int64(4), object(5)
memory usage: 3.9+ MB


In [174]:
zomato.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
online_order,51717.0,0.588665,0.49208,0.0,0.0,1.0,1.0,1.0
book_table,51717.0,0.124698,0.330379,0.0,0.0,0.0,0.0,1.0
rate,51717.0,3.700362,0.395391,1.8,3.5,3.7,3.9,4.9
votes,51717.0,291.646693,801.192379,1.0,29.0,41.0,198.0,16832.0
approx_costfor_two_people,51717.0,554.391689,437.563723,40.0,300.0,400.0,650.0,6000.0


In [175]:
# Creating a another Dataframe after Cleaning
cle_df = zomato
cle_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   online_order               51717 non-null  int64  
 1   book_table                 51717 non-null  int64  
 2   rate                       51717 non-null  float64
 3   votes                      51717 non-null  int64  
 4   rest_type                  51717 non-null  object 
 5   dish_liked                 51717 non-null  object 
 6   cuisines                   51717 non-null  object 
 7   approx_costfor_two_people  51717 non-null  int64  
 8   listed_intype              51717 non-null  object 
 9   listed_incity              51717 non-null  object 
dtypes: float64(1), int64(4), object(5)
memory usage: 3.9+ MB


In [176]:
geo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 3 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   listed_incity  26 non-null     object 
 1   Latitude       26 non-null     float64
 2   Longitude      26 non-null     float64
dtypes: float64(2), object(1)
memory usage: 756.0+ bytes


In [177]:
# Merging the Two Dataframes (Zomato & geographical coordinates)
merged_df = pd.merge(cle_df, geo, on='listed_incity', how='left')
merged_df

Unnamed: 0,online_order,book_table,rate,votes,rest_type,dish_liked,cuisines,approx_costfor_two_people,listed_intype,listed_incity,Latitude,Longitude
0,1,1,4.1,775,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet,Banashankari,12.939333,77.553982
1,1,0,4.1,787,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet,Banashankari,12.939333,77.553982
2,1,0,3.8,918,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet,Banashankari,12.939333,77.553982
3,0,0,3.7,88,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet,Banashankari,12.939333,77.553982
4,0,0,3.8,166,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet,Banashankari,12.939333,77.553982
...,...,...,...,...,...,...,...,...,...,...,...,...
51712,0,0,3.6,27,Bar,Not Available,Continental,1500,Pubs and bars,Whitefield,,
51713,0,0,3.7,41,Bar,Not Available,Finger Food,600,Pubs and bars,Whitefield,,
51714,0,0,3.7,41,Bar,Not Available,Finger Food,2000,Pubs and bars,Whitefield,,
51715,0,1,4.3,236,Bar,"Cocktails, Pizza, Buttermilk",Finger Food,2500,Pubs and bars,Whitefield,,


In [181]:
print(merged_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 12 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   online_order               51717 non-null  int64  
 1   book_table                 51717 non-null  int64  
 2   rate                       51717 non-null  float64
 3   votes                      51717 non-null  int64  
 4   rest_type                  51717 non-null  object 
 5   dish_liked                 51717 non-null  object 
 6   cuisines                   51717 non-null  object 
 7   approx_costfor_two_people  51717 non-null  int64  
 8   listed_intype              51717 non-null  object 
 9   listed_incity              51717 non-null  object 
 10  Latitude                   46137 non-null  float64
 11  Longitude                  46137 non-null  float64
dtypes: float64(3), int64(4), object(5)
memory usage: 4.7+ MB
None


In [186]:
# 1. What is the shape of the given dataset?
cle_df.shape

(51717, 10)

In [218]:
# 2. How many restaurants serve North Indian cuisine?

north_indian_count = zomato[zomato['cuisines'].str.contains('North Indian')].shape[0]
print(f'Number of restaurants serving North Indian cuisine are : {north_indian_count}')

Number of restaurants serving North Indian cuisine are : 21085


In [219]:
# 3. What cuisine is most commonly offered by restaurants in Bangalore?

cuisine_series = zomato['cuisines'].dropna().str.split(', ')
cuisine_list = cuisine_series.explode()
most_common_cuisine = cuisine_list.value_counts().idxmax()

print(f'the most commonly offered cuisine is {most_common_cuisine}')


the most commonly offered cuisine is North Indian


In [220]:
# 4. Which locality in Bangalore has the highest average cost for dining for two people?

highest_avg_cost = zomato.groupby('listed_incity')['approx_costfor_two_people'].mean().sort_values(ascending=False)
top_locality = highest_avg_cost.idxmax()


print(f'the locality with the highest average dining cost for two people is {top_locality}')


the locality with the highest average dining cost for two people is Church Street


In [221]:
# 5. Which restaurant type has the top rating with over 1000 votes?

filtered = zomato[zomato['votes'] > 1000]
top_rest_type = filtered.groupby('rest_type')['rate'].mean().sort_values(ascending=False)

top_type = top_rest_type.idxmax()

print(f'The restaurant type with the highest average rating (with >1000 votes) is {top_type}')


The restaurant type with the highest average rating (with >1000 votes) is Bakery


In [225]:
# 6. How much does it cost at minimum to eat out in Bangalore?

min_cost = zomato['approx_costfor_two_people'].mode()
print(f'The minimum cost to eat out in Bangalore is ₹{int(min_cost)} for two people.')


The minimum cost to eat out in Bangalore is ₹300 for two people.


  print(f'The minimum cost to eat out in Bangalore is ₹{int(min_cost)} for two people.')


In [193]:
#7. What percentage of total online orders is received by restaurants in Banashankari?

total_online_orders = zomato[zomato['online_order'] == 1].shape[0]
banashankari_online_orders = zomato[(zomato['listed_incity'] == 'Banashankari') & (zomato['online_order'] == 1)].shape[0]

# Calculate percentage
percentage = (banashankari_online_orders / total_online_orders) * 100

print(f"Restaurants in Banashankari receive {percentage:.2f}% of all online orders in Bangalore")


Restaurants in Banashankari receive 1.79% of all online orders in Bangalore.


In [227]:
# 8. Which locality has the most restaurants with over 500 votes and a rating below 3.0

filtered = zomato[(zomato['votes'] > 500) & (zomato['rate'] < 3.0)]
location_counts = filtered['listed_incity'].value_counts()

top_location = location_counts.idxmax()
print(f'The locality with the most such restaurants is {top_location}')

The locality with the most such restaurants is Brookefield


In [228]:
# 9. Which locality in Bangalore should Zomato target for expansion based on restaurant type diversity?

rest_type_diversity = zomato.groupby('listed_incity')['rest_type'].nunique().sort_values(ascending=False)

top_diverse_location = rest_type_diversity.idxmax()
print(f'Zomato should consider expanding in {top_diverse_location}')


Zomato should consider expanding in BTM


In [235]:
# 10. What's the average cost difference between buffet and delivery restaurants?

buf_df = zomato[zomato['listed_intype'].str.contains('Buffet')]
del_df = zomato[zomato['listed_intype'].str.contains('Delivery')]

buf_avg = buf_df['approx_costfor_two_people'].mean()
del_avg = del_df['approx_costfor_two_people'].mean()

cost_difference = buf_avg - del_avg
print(f"Average cost difference is RS {int(cost_difference)}")


Average cost difference is RS 831


In [232]:
# 11 . What is the maximum number of votes received by any restaurant with online ordering?
max_votes_online = zomato[zomato['online_order'] == 1]['votes'].max()
print(f"The maximum number of votes received by a restaurant with online ordering is {max_votes_online}.")


The maximum number of votes received by a restaurant with online ordering is 16832.


In [238]:
# 12. What is the average rating of restaurants that serve both North Indian and Chinese cuisines?
north_chaina_cuisines = zomato[zomato['cuisines'].str.contains('North Indian', na=False) &
                       zomato['cuisines'].str.contains('Chinese', na=False)]

# Calculate average rating
avg_rating = north_chaina_cuisines['rate'].mean()

print(f"The average rating of restaurants that serve both North Indian and Chinese cuisines is {round(avg_rating, 2)}.")


The average rating of restaurants that serve both North Indian and Chinese cuisines is 3.59.


In [240]:
# 13. What is the most profitable area for Zomato based on potential revenue estimation?

# HINT: strike rate = (Total Runs / Total Balls Faced) * 100

zomato['revenue_estimate'] = zomato['approx_costfor_two_people'] / zomato['votes'] * 100


area_revenue = zomato.groupby('listed_incity')['revenue_estimate'].sum().sort_values(ascending=False)

top_area = area_revenue.idxmax()


print(f'The most profitable area for Zomato based on potential revenue estimation is {top_area}')


The most profitable area for Zomato based on potential revenue estimation is BTM


In [241]:
# 14. If Zomato wants to reduce customer complaints, which restaurant type should they focus on?

rest_type_votes = zomato.groupby('rest_type')['votes'].sum()
popular_rest_types = rest_type_votes[rest_type_votes > 500].index

filtered = zomato[zomato['rest_type'].isin(popular_rest_types)]


avg_ratings = filtered.groupby('rest_type')['rate'].mean().sort_values()

worst_type = avg_ratings.idxmin()
print(f'Zomato should focus on {worst_type}')


Zomato should focus on Quick Bites, Food Court


In [242]:
# 15. In which area should Zomato invest by considering high rating (rate > 4.2), high number of votes (> 500) and including online orders?
invest_filter = zomato[(zomato['rate'] > 4.2) &(zomato['votes'] > 500) &(zomato['online_order'] == 1)]


invest_areas = invest_filter['listed_incity'].value_counts()
top_area = invest_areas.idxmax()


print(f'Zomato should consider investing in {top_area}')


Zomato should consider investing in MG Road


# Cuisine-Specific Map - Zomato Bangalore Restaurant Trends

In [243]:
! pip install folium



In [252]:
merged_df

Unnamed: 0,online_order,book_table,rate,votes,rest_type,dish_liked,cuisines,approx_costfor_two_people,listed_intype,listed_incity,Latitude,Longitude
0,1,1,4.1,775,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet,Banashankari,12.939333,77.553982
1,1,0,4.1,787,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet,Banashankari,12.939333,77.553982
2,1,0,3.8,918,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet,Banashankari,12.939333,77.553982
3,0,0,3.7,88,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet,Banashankari,12.939333,77.553982
4,0,0,3.8,166,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet,Banashankari,12.939333,77.553982
...,...,...,...,...,...,...,...,...,...,...,...,...
51712,0,0,3.6,27,Bar,Not Available,Continental,1500,Pubs and bars,Whitefield,,
51713,0,0,3.7,41,Bar,Not Available,Finger Food,600,Pubs and bars,Whitefield,,
51714,0,0,3.7,41,Bar,Not Available,Finger Food,2000,Pubs and bars,Whitefield,,
51715,0,1,4.3,236,Bar,"Cocktails, Pizza, Buttermilk",Finger Food,2500,Pubs and bars,Whitefield,,


In [253]:
merged_df.dropna(inplace=True)


In [244]:
# Importing libraries
import folium
from folium.plugins import MarkerCluster
from IPython.display import IFrame


In [254]:
bnglr = merged_df[merged_df['cuisines'].str.contains('Italian',na=False)]

map = folium.Map(location=[12.9393328,77.5539819],zoom_start=12)
for i , row in bnglr.iterrows():
  folium.Marker(
      location=[row['Latitude'],row['Longitude']],
      popup=f"{row['listed_incity']}<br>Cuisine: {row['cuisines']}",
      icon =folium.Icon(color='purple')
  ).add_to(map)

map.save('bnglr_cuisines_map.html')

In [259]:
map