## Importing Data

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv(r"/kaggle/input/zomato-dataset/zomato.csv")

## Initial Investigations

In [3]:
df.head()

Unnamed: 0,url,address,name,online_order,book_table,rate,votes,phone,location,rest_type,dish_liked,cuisines,approx_cost(for two people),reviews_list,menu_item,listed_in(type),listed_in(city)
0,https://www.zomato.com/bangalore/jalsa-banasha...,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,080 42297555\r\n+91 9743772233,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,"[('Rated 4.0', 'RATED\n A beautiful place to ...",[],Buffet,Banashankari
1,https://www.zomato.com/bangalore/spice-elephan...,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,080 41714161,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,"[('Rated 4.0', 'RATED\n Had been here for din...",[],Buffet,Banashankari
2,https://www.zomato.com/SanchurroBangalore?cont...,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,+91 9663487993,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,"[('Rated 3.0', ""RATED\n Ambience is not that ...",[],Buffet,Banashankari
3,https://www.zomato.com/bangalore/addhuri-udupi...,"1st Floor, Annakuteera, 3rd Stage, Banashankar...",Addhuri Udupi Bhojana,No,No,3.7/5,88,+91 9620009302,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,"[('Rated 4.0', ""RATED\n Great food and proper...",[],Buffet,Banashankari
4,https://www.zomato.com/bangalore/grand-village...,"10, 3rd Floor, Lakshmi Associates, Gandhi Baza...",Grand Village,No,No,3.8/5,166,+91 8026612447\r\n+91 9901210005,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,"[('Rated 4.0', 'RATED\n Very good restaurant ...",[],Buffet,Banashankari


In [4]:
df.shape

(51717, 17)

## Handling Columns

In [5]:
df.columns

Index(['url', 'address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'phone', 'location', 'rest_type', 'dish_liked', 'cuisines',
       'approx_cost(for two people)', 'reviews_list', 'menu_item',
       'listed_in(type)', 'listed_in(city)'],
      dtype='object')

In [6]:
df.drop(['url','phone','reviews_list','menu_item','dish_liked','listed_in(city)'],axis=1,inplace=True)

In [7]:
df.columns

Index(['address', 'name', 'online_order', 'book_table', 'rate', 'votes',
       'location', 'rest_type', 'cuisines', 'approx_cost(for two people)',
       'listed_in(type)'],
      dtype='object')

In [8]:
df.rename(columns={'rate':'rating','approx_cost(for two people)':'approx_cost_for_2_ppl',
                   'listed_in(type)':'restaurant_type'}, inplace=True)

In [9]:
df.head(3)

Unnamed: 0,address,name,online_order,book_table,rating,votes,location,rest_type,cuisines,approx_cost_for_2_ppl,restaurant_type
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800,Buffet
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800,Buffet
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800,Buffet


## Handling Null Values in Each Column

In [10]:
df.isnull().sum()

address                     0
name                        0
online_order                0
book_table                  0
rating                   7775
votes                       0
location                   21
rest_type                 227
cuisines                   45
approx_cost_for_2_ppl     346
restaurant_type             0
dtype: int64

In [11]:
#Handling rate, location, rest_type, cuisines, approx_cost(for two people)
df['rating'].unique()

array(['4.1/5', '3.8/5', '3.7/5', '3.6/5', '4.6/5', '4.0/5', '4.2/5',
       '3.9/5', '3.1/5', '3.0/5', '3.2/5', '3.3/5', '2.8/5', '4.4/5',
       '4.3/5', 'NEW', '2.9/5', '3.5/5', nan, '2.6/5', '3.8 /5', '3.4/5',
       '4.5/5', '2.5/5', '2.7/5', '4.7/5', '2.4/5', '2.2/5', '2.3/5',
       '3.4 /5', '-', '3.6 /5', '4.8/5', '3.9 /5', '4.2 /5', '4.0 /5',
       '4.1 /5', '3.7 /5', '3.1 /5', '2.9 /5', '3.3 /5', '2.8 /5',
       '3.5 /5', '2.7 /5', '2.5 /5', '3.2 /5', '2.6 /5', '4.5 /5',
       '4.3 /5', '4.4 /5', '4.9/5', '2.1/5', '2.0/5', '1.8/5', '4.6 /5',
       '4.9 /5', '3.0 /5', '4.8 /5', '2.3 /5', '4.7 /5', '2.4 /5',
       '2.1 /5', '2.2 /5', '2.0 /5', '1.8 /5'], dtype=object)

In [12]:
def handlerate(x):
    if x=='NEW' or x=='-':
        return np.nan
    else:
        x = str(x).split("/")
        x = x[0]
        return float(x)

In [13]:
df['rating'] = df['rating'].apply(handlerate)

In [14]:
df['rating'].unique()

array([4.1, 3.8, 3.7, 3.6, 4.6, 4. , 4.2, 3.9, 3.1, 3. , 3.2, 3.3, 2.8,
       4.4, 4.3, nan, 2.9, 3.5, 2.6, 3.4, 4.5, 2.5, 2.7, 4.7, 2.4, 2.2,
       2.3, 4.8, 4.9, 2.1, 2. , 1.8])

In [15]:
df.isnull().sum()

address                      0
name                         0
online_order                 0
book_table                   0
rating                   10052
votes                        0
location                    21
rest_type                  227
cuisines                    45
approx_cost_for_2_ppl      346
restaurant_type              0
dtype: int64

In [16]:
df['location'].fillna(method='bfill', inplace=True)

In [17]:
df.isnull().sum()

address                      0
name                         0
online_order                 0
book_table                   0
rating                   10052
votes                        0
location                     0
rest_type                  227
cuisines                    45
approx_cost_for_2_ppl      346
restaurant_type              0
dtype: int64

In [18]:
df['rest_type'].nunique()

93

In [19]:
df['rest_type'].fillna(value='others', inplace=True)

In [20]:
df.shape

(51717, 11)

In [21]:
df['approx_cost_for_2_ppl'].fillna(method='ffill', inplace=True)

In [22]:
df['cuisines'].fillna(value='others', inplace=True)

In [23]:
df.dropna(how='any', inplace=True)

In [24]:
df.isnull().sum()

address                  0
name                     0
online_order             0
book_table               0
rating                   0
votes                    0
location                 0
rest_type                0
cuisines                 0
approx_cost_for_2_ppl    0
restaurant_type          0
dtype: int64

## Handling Outliers

In [25]:
df.head(3)

Unnamed: 0,address,name,online_order,book_table,rating,votes,location,rest_type,cuisines,approx_cost_for_2_ppl,restaurant_type
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1,775,Banashankari,Casual Dining,"North Indian, Mughlai, Chinese",800,Buffet
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1,787,Banashankari,Casual Dining,"Chinese, North Indian, Thai",800,Buffet
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8,918,Banashankari,"Cafe, Casual Dining","Cafe, Mexican, Italian",800,Buffet


In [26]:
location = df['location'].value_counts(ascending = False)

In [27]:
location_less_than_500 = location[location<500]

In [28]:
def handle_rest(x):
    if (x in location_less_than_500):
        return 'others'
    else:
        return x

In [29]:
df['location'] = df['location'].apply(handle_rest)

In [30]:
rest_type = df['rest_type'].value_counts(ascending=False)

In [31]:
rest_type_less_than_1000 = rest_type[rest_type < 1000]

In [32]:
def handlerest_type(x):
    if (x in rest_type_less_than_1000):
        return 'others'
    else:
        return x

In [33]:
df['rest_type'] = df['rest_type'].apply(handlerest_type)

In [34]:
cuisines = df['cuisines'].value_counts(ascending=False)

In [35]:
cuisines_less_than_300 = cuisines[cuisines<300]

In [36]:
def handle_cuisines(x):
    if (x in cuisines_less_than_300):
        return 'others'
    else:
        return x

In [37]:
df['cuisines'] = df['cuisines'].apply(handle_cuisines)

In [38]:
df.head(3)

Unnamed: 0,address,name,online_order,book_table,rating,votes,location,rest_type,cuisines,approx_cost_for_2_ppl,restaurant_type
0,"942, 21st Main Road, 2nd Stage, Banashankari, ...",Jalsa,Yes,Yes,4.1,775,Banashankari,Casual Dining,others,800,Buffet
1,"2nd Floor, 80 Feet Road, Near Big Bazaar, 6th ...",Spice Elephant,Yes,No,4.1,787,Banashankari,Casual Dining,others,800,Buffet
2,"1112, Next to KIMS Medical College, 17th Cross...",San Churro Cafe,Yes,No,3.8,918,Banashankari,others,others,800,Buffet
