In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

def read_data_from_csv():
    hotels=pd.read_csv('zomato.csv')
    return hotels


def remove_unwanted_columns():
    #DO NOT REMOVE FOLLOWING LINE
    #call read_data_from_csv() function to get dataframe
    hotels=read_data_from_csv()
    hotels.drop(['address','phone'],axis=1,inplace=True)
    return hotels


def rename_columns():
    #DO NOT REMOVE FOLLOWING LINE
    #call remove_unwanted_columns() function to get dataframe
    hotels = remove_unwanted_columns()
    
    #task2: rename columns,  only these columns are allowed in the dataset
    # 1.	Id
    # 2.	Name
    # 3.	online_order
    # 4.	book_table
    # 5.	rating
    # 6.	votes
    # 7.	location
    # 8.	rest_type
    # 9.	dish_liked
    # 10.	cuisines
    # 11.	approx_cost
    # 12.	type
    new_columns = {
        "rate":"rating",
        "approx_cost(for two people)":"approx_cost",
        "listed_in(type)":"type"
    }
    hotels = hotels.rename(columns = new_columns)
    return hotels


#task3: handle  null values of each column
def null_value_check():
    #DO NOT REMOVE FOLLOWING LINE
    #call rename_columns() function to get dataframe
    hotels=rename_columns()
    
    #deleting null values of name column
    hotels = hotels.dropna(subset=['name'])
    #handling null values of online_order
    hotels['online_order'] = hotels['online_order'].fillna('NA')
    #handling null values of book_table
    hotels['book_table'] = hotels['book_table'].fillna('NA')
    #handling null values of rating
    hotels['rating'] = hotels['rating'].fillna(0)
    #handling null values of votes
    hotels['votes'] = hotels['votes'].fillna(0)
    #handling null values of location
    hotels['location'] = hotels['location'].fillna('NA')
    #handling null values of rest_type
    hotels['rest_type'] = hotels['rest_type'].fillna('NA')
    #handling null values of dishliked
    hotels['dish_liked'] = hotels['dish_liked'].fillna('NA')
    #handling null values of cuisines
    hotels['cuisines'] = hotels['cuisines'].fillna('NA')
    #handling null values of approxcost
    hotels['approx_cost'] = hotels['approx_cost'].fillna(0)
    #handling null values of type
    hotels['type'] = hotels['type'].fillna('NA')
    return hotels


#task4 #find duplicates in the dataset
def find_duplicates():
    #DO NOT REMOVE FOLLOWING LINE
    #call null_value_check() function to get dataframe
    hotels=null_value_check()
    
    #droping the duplicates value keeping the first
    hotels = hotels.drop_duplicates(keep='first')
    return hotels


#task5 removing irrelevant text from all the columns
def removing_irrelevant_text():
    #DO NOT REMOVE FOLLOWING LINE
    #call find_duplicates() function to get dataframe
    hotels= find_duplicates()

    return hotels

In [None]:
df = removing_irrelevant_text() 
df.head()

Unnamed: 0,name,online_order,book_table,rating,votes,location,rest_type,dish_liked,cuisines,approx_cost,type
0,Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet
4,Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet


In [None]:
df['name'].value_counts()

 ('Rated 4.0'                                                                                                                                                                                       288
 ('Rated 5.0'                                                                                                                                                                                       221
 ('Rated 3.0'                                                                                                                                                                                       111
Cafe Coffee Day                                                                                                                                                                                      88
Empire Restaurant                                                                                                                                                                                    64


In [None]:
df

Unnamed: 0,name,online_order,book_table,rating,votes,location,rest_type,dish_liked,cuisines,approx_cost,type
0,Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet
4,Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet
...,...,...,...,...,...,...,...,...,...,...,...
56247,Best Brews - Four Points by Sheraton Bengaluru...,No,No,3.6 /5,27,Whitefield,Bar,,Continental,1500,Pubs and bars
56248,Vinod Bar And Restaurant,No,No,0,0,Whitefield,Bar,,Finger Food,600,Pubs and bars
56249,Plunge - Sheraton Grand Bengaluru Whitefield H...,No,No,0,0,Whitefield,Bar,,Finger Food,2000,Pubs and bars
56250,Chime - Sheraton Grand Bengaluru Whitefield Ho...,No,Yes,4.3 /5,236,"ITPL Main Road, Whitefield",Bar,"Cocktails, Pizza, Buttermilk",Finger Food,2500,Pubs and bars


In [None]:
hotels = df.copy()
hotels=hotels[hotels['name'].str.contains('RATED|Rated')==False]

hotels=hotels[hotels['online_order'].str.contains('RATED|Rated')==False]

hotels=hotels[hotels['book_table'].str.contains('RATED|Rated')==False]

hotels=hotels[hotels ['rating'].str.contains('RATED|Rated')==False]

hotels=hotels[hotels['votes'].str.contains('RATED|Rated')==False]

hotels=hotels[hotels['location'].str.contains ('RATED|Rated')==False]

hotels=hotels[hotels['rest_type'].str.contains('RATED|Rated')==False]
 
hotels=hotels[hotels['dish_liked'].str.contains ('RATED|Rated')==False]

hotels=hotels[hotels ['cuisines'].str.contains('RATED|Rated')==False]

hotels=hotels[hotels['approx_cost'].str.contains('RATED|Rated')==False]

hotels=hotels[hotels['type'].str.contains('RATED|Rated')==False]
hotels

Unnamed: 0,name,online_order,book_table,rating,votes,location,rest_type,dish_liked,cuisines,approx_cost,type
0,Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet
4,Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet
...,...,...,...,...,...,...,...,...,...,...,...
56244,The Farm House Bar n Grill,No,No,3.7 /5,34,Whitefield,"Casual Dining, Bar",,"North Indian, Continental",800,Pubs and bars
56246,Bhagini,No,No,2.5 /5,81,Whitefield,"Casual Dining, Bar","Biryani, Andhra Meal","Andhra, South Indian, Chinese, North Indian",800,Pubs and bars
56247,Best Brews - Four Points by Sheraton Bengaluru...,No,No,3.6 /5,27,Whitefield,Bar,,Continental,1500,Pubs and bars
56250,Chime - Sheraton Grand Bengaluru Whitefield Ho...,No,Yes,4.3 /5,236,"ITPL Main Road, Whitefield",Bar,"Cocktails, Pizza, Buttermilk",Finger Food,2500,Pubs and bars


In [None]:
hotels['online_order'] = hotels['online_order'].apply(lambda x: 'No' if x not in ['Yes', 'No'] else x)
hotels

Unnamed: 0,name,online_order,book_table,rating,votes,location,rest_type,dish_liked,cuisines,approx_cost,type
0,Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet
4,Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet
...,...,...,...,...,...,...,...,...,...,...,...
56244,The Farm House Bar n Grill,No,No,3.7 /5,34,Whitefield,"Casual Dining, Bar",,"North Indian, Continental",800,Pubs and bars
56246,Bhagini,No,No,2.5 /5,81,Whitefield,"Casual Dining, Bar","Biryani, Andhra Meal","Andhra, South Indian, Chinese, North Indian",800,Pubs and bars
56247,Best Brews - Four Points by Sheraton Bengaluru...,No,No,3.6 /5,27,Whitefield,Bar,,Continental,1500,Pubs and bars
56250,Chime - Sheraton Grand Bengaluru Whitefield Ho...,No,Yes,4.3 /5,236,"ITPL Main Road, Whitefield",Bar,"Cocktails, Pizza, Buttermilk",Finger Food,2500,Pubs and bars


In [None]:
hotels['online_order'].unique().tolist()

['Yes', 'No']

In [None]:
hotels = hotels[~hotels['rating'].str.contains('[a-zS]')]
hotels

Unnamed: 0,name,online_order,book_table,rating,votes,location,rest_type,dish_liked,cuisines,approx_cost,type
0,Jalsa,Yes,Yes,4.1/5,775,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet
1,Spice Elephant,Yes,No,4.1/5,787,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet
2,San Churro Cafe,Yes,No,3.8/5,918,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7/5,88,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet
4,Grand Village,No,No,3.8/5,166,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet
...,...,...,...,...,...,...,...,...,...,...,...
56244,The Farm House Bar n Grill,No,No,3.7 /5,34,Whitefield,"Casual Dining, Bar",,"North Indian, Continental",800,Pubs and bars
56246,Bhagini,No,No,2.5 /5,81,Whitefield,"Casual Dining, Bar","Biryani, Andhra Meal","Andhra, South Indian, Chinese, North Indian",800,Pubs and bars
56247,Best Brews - Four Points by Sheraton Bengaluru...,No,No,3.6 /5,27,Whitefield,Bar,,Continental,1500,Pubs and bars
56250,Chime - Sheraton Grand Bengaluru Whitefield Ho...,No,Yes,4.3 /5,236,"ITPL Main Road, Whitefield",Bar,"Cocktails, Pizza, Buttermilk",Finger Food,2500,Pubs and bars


In [None]:
hotels.rating.unique().tolist()

['4.1/5',
 '3.8/5',
 '3.7/5',
 '3.6/5',
 '4.6/5',
 '4.0/5',
 '4.2/5',
 '3.9/5',
 '3.1/5',
 '3.0/5',
 '3.2/5',
 '3.3/5',
 '2.8/5',
 '4.4/5',
 '4.3/5',
 'NEW',
 '2.9/5',
 '3.5/5',
 '2.6/5',
 '3.8 /5',
 '3.4/5',
 '4.5/5',
 '2.5/5',
 '2.7/5',
 '4.7/5',
 '2.4/5',
 '2.2/5',
 '2.3/5',
 '3.4 /5',
 '-',
 '3.6 /5',
 '4.8/5',
 '3.9 /5',
 '4.2 /5',
 '4.0 /5',
 '4.1 /5',
 '3.7 /5',
 '3.1 /5',
 '2.9 /5',
 '3.3 /5',
 '2.8 /5',
 '3.5 /5',
 '2.7 /5',
 '2.5 /5',
 '3.2 /5',
 '2.6 /5',
 '4.5 /5',
 '4.3 /5',
 '4.4 /5',
 '4.9/5',
 '2.1/5',
 '2.0/5',
 '1.8/5',
 '4.6 /5',
 '4.9 /5',
 '3.0 /5',
 '4.8 /5',
 '2.3 /5',
 '4.7 /5',
 '2.4 /5',
 '2.1 /5',
 '2.2 /5',
 '2.0 /5',
 '1.8 /5']

In [None]:
hotels['rating'] = hotels['rating'].replace(['NEW','-'],'0')

In [None]:
hotels.rating.unique().tolist()

['4.1/5',
 '3.8/5',
 '3.7/5',
 '3.6/5',
 '4.6/5',
 '4.0/5',
 '4.2/5',
 '3.9/5',
 '3.1/5',
 '3.0/5',
 '3.2/5',
 '3.3/5',
 '2.8/5',
 '4.4/5',
 '4.3/5',
 '0',
 '2.9/5',
 '3.5/5',
 '2.6/5',
 '3.8 /5',
 '3.4/5',
 '4.5/5',
 '2.5/5',
 '2.7/5',
 '4.7/5',
 '2.4/5',
 '2.2/5',
 '2.3/5',
 '3.4 /5',
 '3.6 /5',
 '4.8/5',
 '3.9 /5',
 '4.2 /5',
 '4.0 /5',
 '4.1 /5',
 '3.7 /5',
 '3.1 /5',
 '2.9 /5',
 '3.3 /5',
 '2.8 /5',
 '3.5 /5',
 '2.7 /5',
 '2.5 /5',
 '3.2 /5',
 '2.6 /5',
 '4.5 /5',
 '4.3 /5',
 '4.4 /5',
 '4.9/5',
 '2.1/5',
 '2.0/5',
 '1.8/5',
 '4.6 /5',
 '4.9 /5',
 '3.0 /5',
 '4.8 /5',
 '2.3 /5',
 '4.7 /5',
 '2.4 /5',
 '2.1 /5',
 '2.2 /5',
 '2.0 /5',
 '1.8 /5']

In [None]:
hotels['rating'] = hotels['rating'].apply(lambda x: x.split('/')[0].strip())
def convert_to_int(x):
    if x == '0':
        return 0
    else:
        return x
hotels['rating'] = hotels['rating'].apply(convert_to_int)
hotels

Unnamed: 0,name,online_order,book_table,rating,votes,location,rest_type,dish_liked,cuisines,approx_cost,type
0,Jalsa,Yes,Yes,4.1,775,Banashankari,Casual Dining,"Pasta, Lunch Buffet, Masala Papad, Paneer Laja...","North Indian, Mughlai, Chinese",800,Buffet
1,Spice Elephant,Yes,No,4.1,787,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet
2,San Churro Cafe,Yes,No,3.8,918,Banashankari,"Cafe, Casual Dining","Churros, Cannelloni, Minestrone Soup, Hot Choc...","Cafe, Mexican, Italian",800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7,88,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet
4,Grand Village,No,No,3.8,166,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet
...,...,...,...,...,...,...,...,...,...,...,...
56244,The Farm House Bar n Grill,No,No,3.7,34,Whitefield,"Casual Dining, Bar",,"North Indian, Continental",800,Pubs and bars
56246,Bhagini,No,No,2.5,81,Whitefield,"Casual Dining, Bar","Biryani, Andhra Meal","Andhra, South Indian, Chinese, North Indian",800,Pubs and bars
56247,Best Brews - Four Points by Sheraton Bengaluru...,No,No,3.6,27,Whitefield,Bar,,Continental,1500,Pubs and bars
56250,Chime - Sheraton Grand Bengaluru Whitefield Ho...,No,Yes,4.3,236,"ITPL Main Road, Whitefield",Bar,"Cocktails, Pizza, Buttermilk",Finger Food,2500,Pubs and bars


In [None]:
hotels['name']=hotels['name'].str.replace('[Ãx][^A-Za-z]+','',regex=True)
#export cleaned Dataset to newcsv file named "zomatocleaned.csv"
hotels.to_csv('zomatocleaned.csv',index=True)