In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from warnings import filterwarnings
filterwarnings('ignore')

In [None]:
df=pd.read_csv('zomato.csv')
df.head()

In [None]:
df.columns

In [None]:
df.dtypes

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
feature_na=[feature for feature in df.columns if df[feature].isnull().sum()>0]
feature_na

In [None]:
#% of missing values
for feature in feature_na:
    print('{} has {} % missing values'.format(feature,np.round(df[feature].isnull().sum()/len(df)*100,4)))

In [None]:
df['rate'].unique()

In [None]:
df.dropna(axis = 'index',subset=['rate'],inplace=True)

In [None]:
def split(x):
    return x.split('/')[0]

In [None]:
df['rate']=df['rate'].apply(split)
df.head()

In [None]:
df['rate'].unique()

In [None]:
df.replace('NEW',0,inplace=True)

In [None]:
df.replace('-',0,inplace=True)

In [None]:
df['rate']=df['rate'].astype(float)

### calculate avg rating of each resturant

In [None]:
df.groupby('name')['rate'].mean().nlargest(20).plot.bar()
plt.show()

In [None]:
df_rate=df.groupby('name')['rate'].mean().to_frame()
df_rate=df_rate.reset_index()
df_rate.columns=['restaurant','rating']
df_rate.head(20)

In [None]:
df_rate.shape

In [None]:
# Rating distribution
sns.set_style(style='whitegrid')
sns.distplot(df_rate['rating'])
plt.show()

#### Which are the top restaurant chains in Bangaluru?

In [None]:
plt.figure(figsize=(10,7))
chains=df['name'].value_counts()[0:20]
sns.barplot(x=chains,y=chains.index,palette='deep')
plt.title("Most famous restaurants chains in Bangaluru")
plt.xlabel("Number of outlets") 
plt.show()

#### How many of the restuarants do not accept online orders?

In [None]:
x=df["online_order"].value_counts()
x

In [None]:
labels=['accepted','Not_accepted']

In [None]:
import plotly.express as px

In [None]:
px.pie(df,values=x,labels=labels,title='Pie chart')

#### What is the ratio b/w restaurants that provide and do not provide table booking ?

In [None]:
x=df['book_table'].value_counts()
labels=['not book','book']

In [None]:
import plotly.graph_objs as go
from plotly.offline import iplot

In [None]:
trace=go.Pie(labels=labels,values=x,hoverinfo='label+percent',textinfo='value')
iplot([trace])

#### How many types of restaurants we have?

In [None]:
df['rest_type'].isna().sum()

In [None]:
df['rest_type'].dropna(inplace=True)

In [None]:
len(df['rest_type'].unique())

In [None]:
trace1=go.Bar(x=df['rest_type'].value_counts().nlargest(20).index,
      y=df['rest_type'].value_counts().nlargest(20)
      )

In [None]:
iplot([trace1])

#### highest voted restaurant

In [None]:
df.groupby('name')['votes'].sum().nlargest(20).plot.bar()
plt.show()

In [None]:
trace1=go.Bar(x=df.groupby('name')['votes'].sum().nlargest(20).index,
       y=df.groupby('name')['votes'].sum().nlargest(20))
iplot([trace1])


### total restaurants at different locations of Bangalore

In [None]:
restaurant=[]
location=[]
for key,location_df in df.groupby('location'):
    location.append(key)
    restaurant.append(len(location_df['name'].unique()))

In [None]:
df_total=pd.DataFrame(zip(location,restaurant))
df_total.columns=['location','restaurant']
df_total.head()

In [None]:
df_total.set_index('location',inplace=True)
df_total.head()

In [None]:
df_total.sort_values(by='restaurant').tail(10).plot.bar()
plt.show()

### Total number of variety of restaurants ie north indian,south Indian

In [None]:
cuisines=df['cuisines'].value_counts()[0:10]
cuisines

In [None]:
trace1=go.Bar(
x=df['cuisines'].value_counts()[0:10].index,
y=df['cuisines'].value_counts()[0:10]
)

In [None]:
iplot([trace1])

In [None]:
df.columns

### Analyse Approx cost for 2 people

In [None]:
df['approx_cost(for two people)'].isna().sum()

In [None]:
df.dropna(axis='index',subset=['approx_cost(for two people)'],inplace=True)

In [None]:
df['approx_cost(for two people)'].unique()

In [None]:
df['approx_cost(for two people)']=df['approx_cost(for two people)'].apply(lambda x:x.replace(',',''))
df['approx_cost(for two people)'].unique()

In [None]:
df['approx_cost(for two people)']=df['approx_cost(for two people)'].astype(int)

In [None]:
df['approx_cost(for two people)'].dtype

In [None]:
sns.distplot(df['approx_cost(for two people)'])

#### cost vs rating

In [None]:
sns.scatterplot(x='rate',y='approx_cost(for two people)',hue='online_order',data=df)

### Is there any difference b/w votes of restaurants accepting and not accepting online orders?

In [None]:
sns.boxplot(x='online_order',y='votes',data=df)

In [None]:
px.box(df,x='online_order',y='votes')

### Is there any difference b/w price of restaurants accepting and not accepting online orders?

In [None]:
px.box(df,x='online_order',y='approx_cost(for two people)')

### Cheapest Rate for 2 people

In [None]:
df['approx_cost(for two people)'].min()

### Most costly for 2 people

In [None]:
df['approx_cost(for two people)'].max()

In [None]:
df[df['approx_cost(for two people)']==6000]['name']

In [None]:
data=df.copy()

In [None]:
data.set_index('name',inplace=True)

In [None]:
data.head()

### Most costly Rate for 2 people is served at which Restaurant what exactly is the dish involved in this and liked dish of that restaurant

In [None]:
df[df['approx_cost(for two people)']==6000][['name','cuisines','dish_liked']]

### Top 10 Most Expensive restaurant with approx cost for 2 people

In [None]:
data['approx_cost(for two people)'].nlargest(10).plot.bar()

In [None]:
trace1 = go.Bar( 
        x = data['approx_cost(for two people)'].nlargest(10).index,
        y = data['approx_cost(for two people)'].nlargest(10),
        name= 'Priority')
iplot([trace1])

### Top 10 Cheapest restaurant with approx cost for 2 people

In [None]:
data['approx_cost(for two people)'].nsmallest(10).plot.bar()

In [None]:
trace1 = go.Bar( 
        x = data['approx_cost(for two people)'].nsmallest(10).index,
        y = data['approx_cost(for two people)'].nsmallest(10),
        name= 'Priority')
iplot([trace1])

### all the restautant that are below than 500(budget hotel)

In [None]:
data[data['approx_cost(for two people)']<=500]

In [None]:
df_budget=data[data['approx_cost(for two people)']<=500].loc[:,('approx_cost(for two people)')]
df_budget=df_budget.reset_index()
df_budget.head()

### Restaurants that have better rating >4 and that are under budget too

In [None]:
df[(df['rate']>=4) & (df['approx_cost(for two people)']<=500)].shape


### Total no. of Restaurants that have better rating >4 and that are under budget too ie less than 500

In [None]:
df_new=df[(df['rate']>=4) & (df['approx_cost(for two people)']<=500)]
len(df_new['name'].unique())

In [None]:
df_new.head()

### Total such various affordable hotels at different location

In [None]:
location=[]
total=[]

for loc,location_df in df_new.groupby('location'):
    location.append(loc)
    total.append(len(location_df['name'].unique()))

In [None]:
location_df=pd.DataFrame(zip(location,total))
location_df.head()

In [None]:
location_df.columns=['location','restaurant']
location_df.head()

### Finding Best budget Restaurants in any location

In [None]:
def return_budget(location,restaurant):
    budget=df[(df['approx_cost(for two people)']<=400) & (df['location']==location) & 
                     (df['rate']>4) & (df['rest_type']==restaurant)]
    return(budget['name'].unique())

In [None]:
return_budget('BTM',"Quick Bites")

### Which are the foodie areas?

In [None]:
plt.figure(figsize=(10,7))
Restaurant_locations=df['location'].value_counts()[:20]
sns.barplot(Restaurant_locations,Restaurant_locations.index)

In [None]:
Restaurant_locations=df['location'].value_counts()[:20]
trace1 = go.Bar( 
        x = Restaurant_locations.index,
        y = Restaurant_locations,
        name= 'Priority')
iplot([trace1])

### Geographical Analysis

In [None]:
locations=pd.DataFrame({'Name':df['location'].unique()})
locations.head()

In [None]:
from geopy.geocoders import Nominatim

In [None]:
geolocator=Nominatim(user_agent='app')

In [None]:
lat_lon=[]
for location in locations['Name']:
    location=geolocator.geocode(location)
    if location is None:
        lat_lon.append(np.nan)
    else:
        geo=(location.latitude,location.longitude)
        lat_lon.append(geo)

In [None]:
locations['geo_loc']=lat_lon

In [None]:
locations.head()

In [None]:
locations.shape

In [None]:
Rest_locations=pd.DataFrame(df['location'].value_counts().reset_index())
Rest_locations.head()

In [None]:
Rest_locations.columns=['Name','count']
Rest_locations.head()

In [None]:
Restaurant_locations=Rest_locations.merge(locations,on='Name',how='left').dropna()
Restaurant_locations.head()

In [None]:
np.array(Restaurant_locations['geo_loc'])

In [None]:
lat,lon=zip(*np.array(Restaurant_locations['geo_loc']))

In [None]:
type(lat)

In [None]:
Restaurant_locations['lat']=lat
Restaurant_locations['lon']=lon

In [None]:
Restaurant_locations.head()

In [None]:
Restaurant_locations.drop('geo_loc',axis=1,inplace=True)
Restaurant_locations.head()

In [None]:
import folium
from folium.plugins import HeatMap

In [None]:
def generatebasemap(default_location=[13.29,77.75],default_zoom_start=12):
    basemap=folium.Map(location=default_location,zoom_start=default_zoom_start)
    return basemap

In [None]:
basemap=generatebasemap()

In [None]:
basemap

In [None]:
HeatMap(Restaurant_locations[['lat','lon','count']].values.tolist(),zoom=20,radius=15).add_to(basemap)

In [None]:
basemap

### Heatmap of North Indian restaurants

In [None]:
df.head()

In [None]:
df2=df[df['cuisines']=='North Indian']
df2.head()

In [None]:
north_india=df2.groupby(['location'],as_index=False)['url'].agg('count')
north_india.columns=['Name','count']
north_india.head()

In [None]:
north_india=north_india.merge(locations,on="Name",how='left').dropna()

In [None]:
north_india.head(10)

In [None]:
north_india['lat'],north_india['lon']=zip(*north_india['geo_loc'].values)
north_india.head()

In [None]:
north_india.drop('geo_loc',axis=1,inplace=True)
north_india.head()

In [None]:
basemap=generatebasemap()
HeatMap(north_india[['lat','lon','count']].values.tolist(),zoom=20,radius=15).add_to(basemap)
basemap

In [None]:
df3= df[df['cuisines']=='South Indian']
south_india=df2.groupby(['location'],as_index=False)['url'].agg('count')
south_india.columns=['Name','count']
south_india=south_india.merge(locations,on="Name",how='left').dropna()
south_india['lan'],south_india['lon']=zip(*south_india['geo_loc'].values)

In [None]:
south_india=south_india.drop(['geo_loc'],axis=1)
south_india.head()

In [None]:
basemap=generatebasemap()
HeatMap(south_india[['lan','lon','count']].values.tolist(),zoom=20,radius=15).add_to(basemap)
basemap

In [None]:
df_1=df.groupby(['rest_type','name']).agg('count')
df_1

In [None]:
datas=df_1.sort_values(['url'],ascending=False).groupby(['rest_type'],
                as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False).head(3))['url'].reset_index().rename(columns={'url':'count'})

In [None]:
datas

In [None]:
df_1=df.groupby(['rest_type','name']).agg('count')
df_1

In [None]:
df_1.sort_values(['url'],ascending=False)

In [None]:
df_1.sort_values(['url'],ascending=False).groupby(['rest_type'],as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False))

In [None]:
df_1.sort_values(['url'],ascending=False).groupby(['rest_type'],as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False))['url']

In [None]:
df_1.sort_values(['url'],ascending=False).groupby(['rest_type'],as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False))['url'].reset_index()

In [None]:
dataset=df_1.sort_values(['url'],ascending=False).groupby(['rest_type'],
                as_index=False).apply(lambda x : x.sort_values(by="url",ascending=False))['url'].reset_index().rename(columns={'url':'count'})

In [None]:
dataset

In [None]:
casual=dataset[dataset['rest_type']=='Casual Dining']
casual