In [40]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [41]:
zo = pd.read_csv("zomato.csv")
zo.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,restaurant name,restaurant type,rate (out of 5),num of ratings,avg cost (two people),online_order,table booking,cuisines type,area,local address
0,0,0,#FeelTheROLL,Quick Bites,3.4,7,200.0,No,No,Fast Food,Bellandur,Bellandur
1,1,1,#L-81 Cafe,Quick Bites,3.9,48,400.0,Yes,No,"Fast Food, Beverages","Byresandra,Tavarekere,Madiwala",HSR
2,2,2,#refuel,Cafe,3.7,37,400.0,Yes,No,"Cafe, Beverages",Bannerghatta Road,Bannerghatta Road
3,3,3,'@ Biryani Central,Casual Dining,2.7,135,550.0,Yes,No,"Biryani, Mughlai, Chinese",Marathahalli,Marathahalli
4,4,4,'@ The Bbq,Casual Dining,2.8,40,700.0,Yes,No,"BBQ, Continental, North Indian, Chinese, Bever...",Bellandur,Bellandur


In [42]:
zo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7105 entries, 0 to 7104
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Unnamed: 0.1           7105 non-null   int64  
 1   Unnamed: 0             7105 non-null   int64  
 2   restaurant name        7105 non-null   object 
 3   restaurant type        7105 non-null   object 
 4   rate (out of 5)        7037 non-null   float64
 5   num of ratings         7105 non-null   int64  
 6   avg cost (two people)  7048 non-null   float64
 7   online_order           7105 non-null   object 
 8   table booking          7105 non-null   object 
 9   cuisines type          7105 non-null   object 
 10  area                   7105 non-null   object 
 11  local address          7105 non-null   object 
dtypes: float64(2), int64(3), object(7)
memory usage: 666.2+ KB


In [43]:
zo.shape

(7105, 12)

In [44]:
zo.describe()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,rate (out of 5),num of ratings,avg cost (two people)
count,7105.0,7105.0,7037.0,7105.0,7048.0
mean,3552.0,3552.0,3.514253,188.921042,540.286464
std,2051.181164,2051.181164,0.463249,592.171049,462.902305
min,0.0,0.0,1.8,1.0,40.0
25%,1776.0,1776.0,3.2,16.0,300.0
50%,3552.0,3552.0,3.5,40.0,400.0
75%,5328.0,5328.0,3.8,128.0,600.0
max,7104.0,7104.0,4.9,16345.0,6000.0


# Dropping the unnamed column and blank data

In [45]:
zo = zo.drop(zo.columns[0], axis=1)

In [46]:
zo = zo.drop(['Unnamed: 0'], axis=1)

# Correlation Heatmap

In [58]:
corr_matrix=zo.corr()
fig = px.imshow(corr_matrix, color_continuous_scale='Plasma')
fig.update_layout(title='Correlation Heatmap')
fig.show()

# Average Cost Distribution

In [48]:
fig = px.histogram(zo, x='avg cost (two people)', nbins=20, marginal='rug',
                  labels={'avg cost (two people)': 'Average Cost(Two People)', 'count': 'Frequency'},
                  title='Average Cost Distribution')
fig.update_layout(bargap=0.2)
fig.show()

# Online order vs Rating

In [50]:
fig=px.box(zo, x='online_order', y='rate (out of 5)',
          labels={'online_order': 'Online Order', 'rate (out of 5)': 'Rating(out of 5)'}, 
          title='Online Order vs Rating')
fig.show()

# Table booking vs Rating

In [51]:
fig = px.box(zo, x='table booking', y='rate (out of 5)',
            labels={'table booking': 'Tabel Booking', 'rate (lout of 5)': 'Rating (Out of 5)'},
            title= 'Table Booking Vs Rating')
fig.show()

# Cuisine Type of Distribution

In [53]:
cuisine_counts = zo['cuisines type'].value_counts()
fig = px.area(cuisine_counts, x=cuisine_counts.index, y=cuisine_counts.values,
             labels={'X': 'Cuisine Type', 'y': 'Count'},
             title='Cuisine Type of Distribution')
fig.update_layout(width=1200, height=800)
fig.show()

# Area-wise rating Distribution

In [54]:
area_rating = zo.groupby('area')['rate (out of 5)'].mean().reset_index()
fig = px.bar(area_rating, x='area', y='rate (out of 5)',
            labels={'area':'Area', 'rate (out of 5)': 'Average Rating'},
            title='Area-Wise Average rating')
fig.show()