<table align="left" width=100%>
    <tr>
        <td>
            <div align="center">
                <font color="#00466C" size=8px>
                  <b> Food Delivery App Data Analysis </b>
                </font>
            </div>
        </td>
    </tr>
</table>

### This project lies on a real-world dataset of zomato, one of the most used food ordering platforms. This project aims on cleaning the dataset, analyze the given dataset, and mining informational quality insights.

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

pd.set_option("display.max_rows",500)
pd.set_option("display.max_columns",500)
pd.set_option("display.width",1000)

In [2]:
# Import data set
def read_data_from_csv():
    hotels=pd.read_csv('zomato.csv')
    return hotels

In [3]:
#task1: drop unwanted columns
def remove_unwanted_columns():
    hotels=read_data_from_csv()
    hotels.drop(['address','phone'], axis=1,inplace=True)
    return hotels

In [4]:
#task2: rename columns, only these columns are allowed in the dataset
def rename_columns():
    #DO NOT REMOVE FOLLOWING LINE
    #call remove_unwanted_columns() function to get dataframe
    hotels = remove_unwanted_columns()
   # hotels=read_data_from_csv()

    hotels.columns = ['name', 'online_order', 'book_table', 'rating', 'votes','location', 
                      'rest_type', 'dish_liked', 'cuisines', 'approx_cost', 'type']
    # 1.	Id
    # 2.	Name
    # 3.	online_order
    # 4.	book_table
    # 5.	rating
    # 6.	votes
    # 7.	location
    # 8.	rest_type
    # 9.	dish_liked
    # 10.	cuisines
    # 11.	approx_cost
    # 12.	type
    return hotels

In [5]:
#task3: handle  null values of each column
def null_value_check():
    #DO NOT REMOVE FOLLOWING LINE
    
    #call rename_columns() function to get dataframe
    hotels=rename_columns()
    
    #deleting null values of name column
    hotels.dropna(subset=['name'],inplace=True)
    #handling null values of online_order
    hotels['online_order'].fillna('NA', inplace=True)
    #handling null values of book_table
    hotels['book_table'].fillna("NA", inplace=True)
    #handling null values of rating
    hotels['rating'].fillna(0, inplace=True)
    #handling null values of votes
    hotels['votes'].fillna(0, inplace=True)
    #handling null values of location
    hotels['location'].fillna("NA", inplace=True)
    #handling null values of rest_type
    hotels['rest_type'].fillna("NA", inplace=True)
    #handling null values of dish_liked
    hotels['dish_liked'].fillna("NA", inplace=True)
    #handling null values of cuisines
    hotels['cuisines'].fillna("NA", inplace=True)
    #handling null values of approxcost
    hotels['approx_cost'].fillna(0, inplace=True)
    #handling null values of type
    hotels['type'].fillna("NA", inplace=True)
    
    return hotels

In [6]:
#task4 #find duplicates in the dataset
def find_duplicates():
    #DO NOT REMOVE FOLLOWING LINE
    #call null_value_check() function to get dataframe
    hotels=null_value_check()
    #droping the duplicates value keeping the first
    
    hotels.drop_duplicates( keep = 'first',inplace=True)
    return hotels

In [7]:
#task5 removing irrelevant text from all the columns
def removing_irrelevant_text():
    #DO NOT REMOVE FOLLOWING LINE
    #call find_duplicates() function to get dataframe
    hotels= find_duplicates()
    
    for i in hotels.columns:
        hotels=hotels[hotels[i].str.contains('RATED|Rated')==False]
    
    return hotels

In [8]:
#task6: check for unique values in each column and handle the irrelevant values
def check_for_unique_values():
    #DO NOT REMOVE FOLLOWING LINE
    #call removing_irrelevant_text() function to get dataframe
    hotels=removing_irrelevant_text()
    
    hotels=hotels[hotels['online_order'].str.contains('Yes|No')==True]
    
    hotels.rating.replace(["/5"," /5"],"",regex=True, inplace=True)
    hotels.rating.replace(["NEW","-"],0, inplace=True)
       
    
    
    return hotels

In [9]:
#task7: remove the unknown character from the dataset and export it to "zomatocleaned.csv"
def remove_the_unknown_character():
    #DO NOT REMOVE FOLLOWING LINE
    #call check_for_unique_values() function to get dataframe
    hotels=check_for_unique_values()

    #remove unknown character from dataset
    hotels["name"] =hotels['name'].str.replace('[Ãx][^A-Za-z]+', '')
        
    return hotels

In [10]:
# Eliminate duplicates row for performing data cleaning and analysis. 
def final_df():
    
    #DO NOT REMOVE FOLLOWING LINE
    #call remove_the_unknown_character function to get dataframe
    hotels=remove_the_unknown_character()
    
    # sort as per votes (desc - highest vote for the restaurant)
    hotels.sort_values(by = ["votes","name"], ascending= [False,True],inplace = True)
    
    # remove duplicate as per name & location
    hotels.drop_duplicates(subset = ["name", "location"], keep = 'first',inplace=True)
    
    # rearrange as per index(id)
    hotels.sort_index(ascending= True,inplace = True)
    
    return hotels

In [11]:
def export():
    
    #DO NOT REMOVE FOLLOWING LINE
    #call final_df function to get dataframe
    hotels=final_df()
    
    #export cleaned Dataset to newcsv file named "zomatocleaned.csv"
    hotels.to_csv('zomatocleaned.csv')


#check if mysql table is created using "zomatocleaned.csv"
#Use this final dataset and upload it on the provided database for performing analysis in  MySQLdef start():

In [12]:
df = final_df()
df.head()

Unnamed: 0,name,online_order,book_table,rating,votes,location,rest_type,dish_liked,cuisines,approx_cost,type
1,Spice Elephant,Yes,No,4.1,787,Banashankari,Casual Dining,"Momos, Lunch Buffet, Chocolate Nirvana, Thai G...","Chinese, North Indian, Thai",800,Buffet
3,Addhuri Udupi Bhojana,No,No,3.7,88,Banashankari,Quick Bites,Masala Dosa,"South Indian, North Indian",300,Buffet
4,Grand Village,No,No,3.8,166,Basavanagudi,Casual Dining,"Panipuri, Gol Gappe","North Indian, Rajasthani",600,Buffet
6,Rosewood International Hotel - Bar & Restaurant,No,No,3.6,8,Mysore Road,Casual Dining,,"North Indian, South Indian, Andhra, Chinese",800,Buffet
11,CafDown The Alley,Yes,No,4.1,402,Banashankari,Cafe,"Waffles, Pasta, Crispy Chicken, Honey Chilli C...",Cafe,500,Cafes


In [13]:
export()