In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from IPython.core.display import display, HTML
import time

In [5]:
df=pd.read_csv("/content/zomato_data.csv")

In [6]:
df.shape

(51717, 10)

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51717 entries, 0 to 51716
Data columns (total 10 columns):
 #   Column                     Non-Null Count  Dtype 
---  ------                     --------------  ----- 
 0   online_order               51717 non-null  object
 1   book_table                 51717 non-null  object
 2   rate                       43942 non-null  object
 3   votes                      51717 non-null  int64 
 4   rest_type                  51490 non-null  object
 5   dish_liked                 23639 non-null  object
 6   cuisines                   51672 non-null  object
 7   approx_costfor_two_people  51371 non-null  object
 8   listed_intype              51717 non-null  object
 9   listed_incity              51717 non-null  object
dtypes: int64(1), object(9)
memory usage: 3.9+ MB


In [8]:
df.describe()

Unnamed: 0,votes
count,51717.0
mean,283.697527
std,803.838853
min,0.0
25%,7.0
50%,41.0
75%,198.0
max,16832.0


In [9]:
df.columns

Index(['online_order', 'book_table', 'rate', 'votes', 'rest_type',
       'dish_liked', 'cuisines', 'approx_costfor_two_people', 'listed_intype',
       'listed_incity'],
      dtype='object')

In [10]:
# how many restraunts serve north indian cuisine
df[df['cuisines'].str.contains('North Indian',na=False)].shape[0]

21085

In [11]:
# what cuisine is most commolny offered by restraunts in bangalore
cuisine_series = df['cuisines'].dropna()
cuisine_exploded = cuisine_series.str.split(',').explode().str.strip()
cuisine_counts = cuisine_exploded.value_counts()
print("Top 10 Most Common Cuisines in Bangalore:")
print(cuisine_counts.head(10))

Top 10 Most Common Cuisines in Bangalore:
cuisines
North Indian    21085
Chinese         15547
South Indian     8644
Fast Food        8096
Biryani          6492
Continental      5765
Desserts         5633
Cafe             5303
Beverages        4747
Italian          3389
Name: count, dtype: int64


In [14]:
# which locality in banglore has the highest average cost for dining (for two people)

df['approx_costfor_two_people'] = df['approx_costfor_two_people'].replace(',', '', regex=True)
df['approx_costfor_two_people'] = pd.to_numeric(df['approx_costfor_two_people'], errors='coerce')

avg_cost_by_locality = df.groupby('listed_incity')['approx_costfor_two_people'].mean().sort_values(ascending=False)
avg_cost_by_locality.head(10)


Unnamed: 0_level_0,approx_costfor_two_people
listed_incity,Unnamed: 1_level_1
Church Street,771.990104
Brigade Road,767.091115
MG Road,763.987696
Lavelle Road,753.584873
Residency Road,742.960723
Indiranagar,654.753655
Old Airport Road,610.877698
Whitefield,579.159925
Malleshwaram,574.789762
Frazer Town,558.237288


In [None]:
# which restraunt type has the top rating with over 1000 votes

df['rate'] = pd.to_numeric(df['rate'].str.split('/').str[0], errors='coerce')
df[df['votes'] > 1000].groupby('rest_type')['rate'].mean().sort_values(ascending=False).head(1)

Unnamed: 0_level_0,rate
rest_type,Unnamed: 1_level_1
Bakery,4.8


In [15]:
# how much does it cost at minimum to eat out in banglore

min_cost = df['approx_costfor_two_people'].min()
min_cost

40.0

In [17]:
# what percentge of toal online orders is received by restraunt in banashankari

banashankari_online = df[(df['listed_incity'] == 'Banashankari') & (df['online_order'] == 'Yes')]
total_online_orders = df[df['online_order'] == 'Yes'].shape[0]
banashankari_online_count = banashankari_online.shape[0]

percentage_banashankari_online = (banashankari_online_count / total_online_orders) * 100
percentage_banashankari_online

1.7934568387859677

In [18]:
# which locality has the most restraunts with over 500 votes and a rating below 3.0

df['rate_clean'] = df['rate'].replace(['NEW', '-', 'NEW ', ' - '], pd.NA)
df['rate_clean'] = df['rate_clean'].str.extract(r'(\d+\.?\d*)')
df['rate_clean'] = pd.to_numeric(df['rate_clean'], errors='coerce')

filtered_df = df[(df['votes'] > 500) & (df['rate_clean'] < 3.0)]

low_rated_popular_by_locality = filtered_df['listed_incity'].value_counts()

low_rated_popular_by_locality.head(10)

Unnamed: 0_level_0,count
listed_incity,Unnamed: 1_level_1
Brookefield,8
Whitefield,6
Indiranagar,4
Old Airport Road,4
Marathahalli,3
Bellandur,3
Banashankari,2
Koramangala 7th Block,2
Koramangala 6th Block,2
Koramangala 5th Block,2


In [19]:
# which locality in banglore should zomato target for expasion based on restraunt type diversity
rest_type_diversity = df.groupby('listed_incity')['rest_type'].nunique()

rest_type_diversity_sorted = rest_type_diversity.sort_values(ascending=False)

print(rest_type_diversity_sorted.head())

rest_type_diversity_sorted.tail()


listed_incity
BTM              61
Brigade Road     60
Church Street    60
Lavelle Road     58
MG Road          58
Name: rest_type, dtype: int64


Unnamed: 0_level_0,rest_type
listed_incity,Unnamed: 1_level_1
Kalyan Nagar,41
Basavanagudi,39
Banashankari,37
Electronic City,34
New BEL Road,34


In [20]:
# what is the average cost difference between buffet and delivery restraunts

buffet_cost = df[df['listed_intype'] == 'Buffet']['approx_costfor_two_people'].mean()
delivery_cost = df[df['listed_intype'] == 'Delivery']['approx_costfor_two_people'].mean()

cost_difference = buffet_cost - delivery_cost

print(f"Buffet Avg Cost: ₹{buffet_cost:.2f}")
print(f"Delivery Avg Cost: ₹{delivery_cost:.2f}")
print(f"Difference: ₹{cost_difference:.2f}")


Buffet Avg Cost: ₹1306.66
Delivery Avg Cost: ₹464.49
Difference: ₹842.17


In [22]:
# What is the maximum number of votes received by any restaurant with online ordering?

online_order_restaurants = df[df['online_order'] == 'Yes']
max_votes_online = online_order_restaurants['votes'].max()
max_votes_online


16832

In [23]:
# what is the average rating of restraunts that serve both north indian and chinese cusines
df_clean = df.dropna()
df['rate_cleaned'] = df['rate'].str.extract(r'(\d+\.?\d*)')
df['rate_cleaned'] = pd.to_numeric(df['rate_cleaned'], errors='coerce')
def serves_north_indian_and_chinese(cuisines):
    if pd.isnull(cuisines):
        return False
    cuisines_list = [c.strip().lower() for c in cuisines.split(',')]
    return 'north indian' in cuisines_list and 'chinese' in cuisines_list

df_filtered = df[df['cuisines'].apply(serves_north_indian_and_chinese)]
average_rating = df_filtered['rate_cleaned'].mean()

print(f"Average rating of restaurants serving both North Indian and Chinese cuisines: {average_rating:.2f}")

Average rating of restaurants serving both North Indian and Chinese cuisines: 3.57


In [24]:
# what is the most pofitable area for zomato based on potential revenue estimation

df['potential_revenue'] = df['votes'] * df['approx_costfor_two_people']
revenue_by_locality = df.groupby('listed_incity')['potential_revenue'].sum().sort_values(ascending=False)

revenue_by_locality.head(10)

Unnamed: 0_level_0,potential_revenue
listed_incity,Unnamed: 1_level_1
Koramangala 7th Block,1001584000.0
Koramangala 5th Block,983043100.0
MG Road,925832100.0
BTM,923668000.0
Koramangala 4th Block,915520500.0
Koramangala 6th Block,901388000.0
Church Street,894655800.0
Indiranagar,874599800.0
Brigade Road,832403000.0
Residency Road,755886000.0


In [None]:
# if zomatao wants to reduce customer complaints which restraunt type should they focus on
if df['rate'].dtype == object:
    df['rate'] = df['rate'].str.extract(r'(\d+\.?\d*)')

df['rate'] = pd.to_numeric(df['rate'], errors='coerce')
df_clean = df.dropna(subset=['rest_type', 'rate'])
avg_rating_by_rest_type = df_clean.groupby('rest_type')['rate'].mean().sort_values()
print("Restaurant types with lowest average ratings (potential high complaints):")
print(avg_rating_by_rest_type.head(5))

Restaurant types with lowest average ratings (potential high complaints):
rest_type
Dessert Parlor, Kiosk        3.000000
Bakery, Food Court           3.100000
Bakery, Beverage Shop        3.200000
Food Court, Casual Dining    3.240000
Bhojanalya                   3.266667
Name: rate, dtype: float64


In [25]:
# in which area should zomato invest by comsidering high rating (rate>4.2) hgh number of votes(>500 and including online orders)
if df['rate'].dtype == object:
    df['rate'] = df['rate'].str.extract(r'(\d+\.?\d*)')
    df['rate'] = pd.to_numeric(df['rate'], errors='coerce')
filtered_df = df[
    (df['rate'] > 4.2) &
    (df['votes'] > 500) &
    (df['online_order'].str.lower() == 'yes')
]

top_areas = filtered_df['listed_incity'].value_counts().head(10)
print("Top areas where Zomato should consider investing:")
print(top_areas)

Top areas where Zomato should consider investing:
listed_incity
MG Road                  97
Koramangala 7th Block    97
Church Street            95
Koramangala 4th Block    95
Brigade Road             94
Koramangala 5th Block    92
BTM                      91
Koramangala 6th Block    90
Lavelle Road             82
Residency Road           81
Name: count, dtype: int64
