In [None]:
#import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('dark_background')

In [None]:
#reading the data
df = pd.read_csv('zomato.csv')
df.head()

In [None]:
df.shape

In [None]:
df.columns

In [None]:
#dropping the columns
df = df.drop(['url', 'address','phone','menu_item','dish_liked','reviews_list'],axis=1)
df.head()

In [None]:
df.info()

In [None]:
df.drop_duplicates(inplace = True)
df.shape

In [None]:
df['rate'].unique()

In [None]:
#function to clean the rate column
def fixrate(value):
    if(value=='NEW' or value=='-'):
        return np.nan
    else:
        value = str(value).split('/')
        value = value[0]
        return float(value)

df['rate'] = df['rate'].apply(fixrate)
df['rate'].head()

In [None]:
df.rate.isnull().sum()

In [None]:
df['rate'].fillna(df['rate'].mean(),inplace=True)
df.rate.isnull().sum()

In [None]:
df.info()

In [None]:
df.dropna(inplace=True)
df.head()

In [None]:
df.info()

In [None]:
#renaming columns
df.rename(columns = {'approx_cost(for two people)':'cost2plates','listed_in(type)':'type'},inplace=True)
df.head()

In [None]:
df['location'].unique()

In [None]:
df['listed_in(city)'].unique()

In [None]:
df=df.drop(['listed_in(city)'],axis=1)

In [None]:
df.head()

In [None]:
df['cost2plates'].unique()

In [None]:
#cleaning cost2plates 
def fixcomma(value):
    value = str(value)
    if ',' in value:
        value = value.replace(',','')
        return float(value)
    else:
        return float(value)
    
df['cost2plates'] = df['cost2plates'].apply(fixcomma)
df['cost2plates'].unique()

In [None]:
df.head()

In [None]:
df['rest_type'].value_counts()

In [None]:
rest_types = df['rest_type'].value_counts(ascending = False)
rest_types

In [None]:
rest_types_less1000 = rest_types[rest_types<1000]
rest_types_less1000

In [None]:
#cleaning rest_types
def fixrest_type(value):
    if(value in rest_types_less1000):
        return 'others'
    else:
        return value
    
df['rest_type'] = df['rest_type'].apply(fixrest_type)
df['rest_type'].value_counts()

In [None]:
df['location'].value_counts()

In [None]:
locations = df['location'].value_counts(ascending = False)
locations

In [None]:
location_less300 = locations[locations<300]
location_less300

In [None]:
#cleaning location
def fixlocation(value):
    if(value in location_less300):
        return 'others'
    else:
        return value

df['location'] = df['location'].apply(fixlocation)
df['location'].value_counts()

In [None]:
df['cuisines'].value_counts()

In [None]:
cuisine = df['cuisines'].value_counts(ascending = False)
cuisine

In [None]:
cuisine_less100 = cuisine[cuisine<100]
cuisine_less100

In [None]:
#cleaning cuisine
def fixcuisine(value):
    if(value in cuisine_less100):
        return 'others'
    else:
        return value
    
df['cuisines'] = df['cuisines'].apply(fixcuisine)
df['cuisines'].value_counts()

In [None]:
df['type'].value_counts()

In [None]:
#plotting location,onlinee order,book_tale etc
plt.figure(figsize=(16,10))
ax = sns.countplot(df['location'])
plt.xticks(rotation = 90)

In [None]:
plt.figure(figsize=(6,6))
sns.countplot(df['online_order'],palette='inferno')

In [None]:
plt.figure(figsize=(6,6))
sns.countplot(df['book_table'],palette='rainbow')

In [None]:
plt.figure(figsize=(6,6))
sns.boxplot(x='online_order',y='rate',data = df)

In [None]:
plt.figure(figsize=(6,6))
sns.boxplot(x = 'book_table',y = 'rate',data = df)

In [None]:
#visualizing location vs order
df_loc = df.groupby(['location','online_order'])['name'].count()
df_loc.to_csv('location_online.csv')
df_loc = pd.read_csv('location_online.csv')
df_loc = pd.pivot_table(df_loc,values=None,index=['location'],columns=['online_order'],fill_value = 0,aggfunc = np.sum)
df_loc

In [None]:
df_loc.plot(kind = 'bar',figsize=(15,8))

In [None]:
#visualizing location vs book_table
df_book = df.groupby(['location','book_table'])['name'].count()
df_book.to_csv('location_book.csv')
df_book = pd.read_csv('location_book.csv')
df_book = pd.pivot_table(df_book,values=None,index=['location'],columns=['book_table'],fill_value = 0,aggfunc = np.sum)
df_book

In [None]:
df_book.plot(kind ='bar',figsize=(15,8))

In [None]:
plt.figure(figsize=(15,8))
sns.boxplot(x='type',y='rate',data = df,palette = 'inferno')

In [None]:
#viualizing location vs type
df_type = df.groupby(['location','type'])['name'].count()
df_type.to_csv('location_type.csv')
df_type = pd.read_csv('location_type.csv')
df_type = pd.pivot_table(df_type,values=None,index=['location'],columns=['type'],fill_value = 0,aggfunc = np.sum)
df_type

In [None]:
df_type.plot(kind='bar',figsize=(36,8))

In [None]:
#visualizing location vs votes
df_vote = df[['location','votes']]
df_vote.drop_duplicates()
df_votes = df_vote.groupby(['location'])['votes'].sum()
df_votes = df_votes.to_frame()
df_votes = df_votes.sort_values('votes',ascending=False)
df_votes.head()

In [None]:
plt.figure(figsize=(15,8))
sns.barplot(df_votes.index,df_votes['votes'])
plt.xticks(rotation = 90)

In [None]:
#visualizing cuisine vs votes
df_cuisine = df[['cuisines','votes']]
df_cuisine.drop_duplicates()
df_cuisines = df_cuisine.groupby(['cuisines'])['votes'].sum()
df_cuisines = df_cuisines.to_frame()
df_cuisines = df_cuisines.sort_values('votes',ascending=False)
df_cuisines.head()

In [None]:
df_cuisin = df_cuisines.iloc[1:,:]
df_cuisin.head()

In [None]:
plt.figure(figsize=(15,8))
sns.barplot(df_cuisin.index,df_cuisin['votes'])
plt.xticks(rotation = 90)