In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

def read_data_from_csv():
    hotels=pd.read_csv('zomato.csv')
    return hotels


def remove_unwanted_columns():
    hotels = read_data_from_csv() # assuming this function returns a DataFrame object
    unwanted_columns = ['address','phone'] # specify column names to be removed
    hotels = hotels.drop(columns=unwanted_columns) # remove unwanted columns
    return hotels # return updated DataFrame object


def rename_columns():
    hotels = remove_unwanted_columns() # assuming this function returns a DataFrame object
    column_mapping = {'name': 'name', 'rate': 'rating','approx_cost(for two people)':'approx_cost','listed_in(type)':'type'} # specify column name changes
    hotels = hotels.rename(columns=column_mapping) # rename columns
    return hotels # return updated DataFrame object
    
    #task2: rename columns,  only these columns are allowed in the dataset
    # 1.	Id
    # 2.	Name
    # 3.	online_order
    # 4.	book_table
    # 5.	rating
    # 6.	votes
    # 7.	location
    # 8.	rest_type
    # 9.	dish_liked
    # 10.	cuisines
    # 11.	approx_cost
    # 12.	type
    #return hotels


#task3: handle  null values of each column
def null_value_check():
    # call rename_columns() function to get dataframe
    hotels = rename_columns()
    
    # deleting null values of name column
    hotels = hotels.dropna(subset=['name'])
    
    # handling null values of online_order
    hotels['online_order'].fillna(value=False, inplace=True)
    
    # handling null values of book_table
    hotels['book_table'].fillna(value=False, inplace=True)
    
    # handling null values of rating
    hotels['rating'].fillna(value=0, inplace=True)
    #hotels['rating'].fillna(value=hotels['rating'].mean(), inplace=True)
    
    # handling null values of votes
    hotels['votes'].fillna(value=0, inplace=True)
    
    # handling null values of location
    hotels['location'].fillna(value='NA', inplace=True)
    #hotels['location'].fillna(value='Unknown', inplace=True)
    hotels['rest_type'].fillna(value='NA', inplace=True)
    
    hotels['dish_liked'].fillna(value='NA', inplace=True)
    
    hotels['cuisines'].fillna(value='NA', inplace=True)
    
    hotels['approx_cost'].fillna(value=0, inplace=True)

    hotels['type'].fillna(value='NA', inplace=True)

    # return the cleaned DataFrame
    return hotels

#task4 #find duplicates in the dataset
def find_duplicates():
    # call null_value_check() function to get dataframe
    hotels = null_value_check()
    
    # drop duplicates, keeping the first occurrence
    hotels.drop_duplicates(keep='first', inplace=True)
    
    # return the cleaned DataFrame
    return hotels



#task5 removing irrelevant text from all the columns
def removing_irrelevant_text():
    # call find_duplicates() function to get dataframe
    hotels = find_duplicates()

    # remove irrelevant text from all columns using string manipulation methods
    hotels = hotels[hotels['name'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['online_order'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['book_table'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['rating'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['votes'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['location'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['rest_type'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['dish_liked'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['cuisines'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['approx_cost'].str.contains('RATED|Rated') == False]
    hotels = hotels[hotels['type'].str.contains('RATED|Rated') == False]

    return hotels

#task6: check for unique values in each column and handle the irrelevant values
import re
def check_for_unique_values():
    # call removing_irrelevant_text() function to get dataframe
    hotels = removing_irrelevant_text()


# Remove non-alphabetic characters from the dish_liked column
    hotels['dish_liked'] = hotels['dish_liked'].apply(lambda x: re.sub('[^a-zA-Z,]', '', x) if isinstance(x, str) else '')

# Convert all strings in the dish_liked column to lowercase and split at every comma
    hotels['dish_liked'] = hotels['dish_liked'].apply(lambda x: x.lower().strip().split(',') if isinstance(x, str) else [])

# Remove any leading or trailing whitespaces from the dishes in the dish_liked column
    hotels['dish_liked'] = hotels['dish_liked'].apply(lambda x: [dish.strip() for dish in x])

# Remove non-alphabetic characters from the dish_liked column
    hotels['online_order'] = hotels['online_order'].apply(lambda x: re.sub('[^a-zA-Z,]', '', x) if isinstance(x, str) else '')

# Convert all strings in the dish_liked column to lowercase and split at every comma
    hotels['online_order'] = hotels['online_order'].apply(lambda x: x.lower().strip().split(',') if isinstance(x, str) else [])

# Remove any leading or trailing whitespaces from the dishes in the dish_liked column
    hotels['online_order'] = hotels['online_order'].apply(lambda x: [dish.strip() for dish in x])

# Remove non-alphabetic characters from the dish_liked column
    hotels['book_table'] = hotels['book_table'].apply(lambda x: re.sub('[^a-zA-Z,]', '', x) if isinstance(x, str) else '')

# Convert all strings in the dish_liked column to lowercase and split at every comma
    hotels['book_table'] = hotels['book_table'].apply(lambda x: x.lower().strip().split(',') if isinstance(x, str) else [])

# Remove any leading or trailing whitespaces from the dishes in the dish_liked column
    hotels['book_table'] = hotels['book_table'].apply(lambda x: [dish.strip() for dish in x])

# Remove non-alphabetic characters from the dish_liked column
    hotels['votes'] = hotels['votes'].apply(lambda x: re.sub('[^a-zA-Z,]', '', x) if isinstance(x, str) else '')

# Convert all strings in the dish_liked column to lowercase and split at every comma
    hotels['votes'] = hotels['votes'].apply(lambda x: x.lower().strip().split(',') if isinstance(x, str) else [])

# Remove any leading or trailing whitespaces from the dishes in the dish_liked column
    hotels['votes'] = hotels['votes'].apply(lambda x: [dish.strip() for dish in x])


# Remove non-alphabetic characters from the dish_liked column
    hotels['rating'] = hotels['rating'].apply(lambda x: re.sub('[^a-zA-Z,]', '', x) if isinstance(x, str) else '')

# Convert all strings in the dish_liked column to lowercase and split at every comma
    hotels['rating'] = hotels['rating'].apply(lambda x: x.lower().strip().split(',') if isinstance(x, str) else [])

# Remove any leading or trailing whitespaces from the dishes in the dish_liked column
    hotels['rating'] = hotels['rating'].apply(lambda x: [dish.strip() for dish in x])

# Remove non-alphabetic characters from the dish_liked column
    hotels['approx_cost'] = hotels['approx_cost'].apply(lambda x: re.sub('[^a-zA-Z,]', '', x) if isinstance(x, str) else '')

# Convert all strings in the dish_liked column to lowercase and split at every comma
    hotels['approx_cost'] = hotels['approx_cost'].apply(lambda x: x.lower().strip().split(',') if isinstance(x, str) else [])

# Remove any leading or trailing whitespaces from the dishes in the dish_liked column
    hotels['approx_cost'] = hotels['approx_cost'].apply(lambda x: [dish.strip() for dish in x])

    return hotels


#task7: remove the unknown character from the dataset and export it to "zomatocleaned.csv"
def remove_the_unknown_character():
    #DO NOT REMOVE FOLLOWING LINE
    #call check_for_unique_values() function to get dataframe
    dataframe=check_for_unique_values()


    #remove unknown character from dataset
    
    #export cleaned Dataset to newcsv file named "zomatocleaned.csv"
    dataframe.to_csv('zomatocleaned.csv')
    return dataframe


#check if mysql table is created using "zomatocleaned.csv"
#Use this final dataset and upload it on the provided database for performing analysis in  MySQL
#To Run this task first Run the appliation for Terminal to create table named 'Zomato' and then run test.
def start():
    remove_the_unknown_character()

def task_runner():
    start()