# Job Requirements

# Imports, Global Variables & Utility Functions

In [4]:
# built-in packages
import os
import pickle
import time
from pprint import pprint
from typing import List, Dict, Tuple, Union, Optional, Any 
import posixpath

# note (source: phind.ai): we are not using urllib due its unpredictability. 
#       for example, if the base url doesn't have a trailing slash,
#       urllib will not join the base url with the relative url correctly.
#       Moreover, if the second url has a leading slash, urllib will ignore the base url.
# from urllib.parse import urljoin, 

# pip-installed packages
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [5]:
# global variables
COUNTRIES = ['india', 'uae']
BASE_OUTPUT_URL = 'outputs'
EXCEL_DIR = 'excel_files'
CSV_DIR = 'csv_files'

# global constants
BASE_URL = "https://www.zomato.com/"

In [None]:
def save_df(df:pd.DataFrame, file_name_without_ext:str, to_excel:bool=True, to_csv:bool=True) -> None:
    """
    Save a dataframe to excel and csv files.
    """
    if to_excel:
        df.to_excel(os.path.join(BASE_OUTPUT_URL, EXCEL_DIR, file_name_without_ext + '.xlsx'), index=False)
    if to_csv:
        df.to_csv(os.path.join(BASE_OUTPUT_URL, CSV_DIR, file_name_without_ext + '.csv'), index=False)

In [6]:
def save_pkl(content_to_be_saved, path) -> None:
    """
    Save the given content to a file using pickle serialization.

    Args:
        content_to_be_saved: Any serializable object.
        path (str): The full path to the file where the data will be saved.

    Example:
        >>> save_pkl({'name': 'John', 'age': 30}, 'data.pkl')

    Returns:
        None

    Notes:
        "path" could either mean relative path or absolute path.
    """
    with open(path, 'wb') as f:
        pickle.dump(content_to_be_saved, f)

def load_pkl(path:str) -> Union[bool, any]:
    """
    Load data from a file using pickle deserialization.

    Args:
        path (str): The full path to the file to be loaded.

    Example:
        >>> loaded_data = load_pkl('data.pkl')

    Returns:
        The loaded content if the file exists, or False if the file does not exist.

    Notes:
        "path" could either mean relative path or absolute path.
    """
    if not os.path.exists(path):
        return False
    with open(path, 'rb') as f:
        content = pickle.load(f)
    return content

def force_load_pkl(path:str, dtype:str='dict') -> any:
    """
    Load data from a file using pickle deserialization. If the file does not exist,
    create an empty file and return an empty dictionary or list based on the specified data type.

    Args:
        path (str): The full path to the file to be loaded or created.
        dtype (str, optional): The data type to create if the file does not exist ('dict' or 'list').

    Example:
        >>> loaded_data = force_load_pkl('data.pkl', dtype='dict')

    Returns:
        The loaded content if the file exists, or an empty dictionary or list if the file does not exist.

    Notes:
        "path" could either mean relative path or absolute path.
    """
    try:
        content = load_pkl(path)
        return content
    except Exception as e:
        if dtype == 'list':
            save_pkl([], path)
            return []
        else:
            save_pkl({}, path)
            return {}

def join_paths(base_path:str, *relative_paths:str) -> str:
    """
    Join two paths and normalize the result.

    Args:
        base_directory (str): The base directory path.
        relative_path (str): The relative path to be joined.

    Example:
        >>> result = join_paths('/user/documents', 'files/myfile.txt')

    Returns:
        The normalized and joined path.

    Notes:
        More about naming standards for path components here: https://stackoverflow.com/a/2235762
    """
    return os.path.normpath(os.path.join(base_directory, relative_path))

def change_sep(path:str, new_sep:str, old_sep:str=os.path.sep) -> str:
    """
    Change the path separator in a given path and normalize the result.

    Args:
        path (str): The input path.
        new_sep (str): The new path separator to use.
        old_sep (str, optional): The old path separator to replace (default is os.path.sep).

    Example:
        >>> result = change_sep('path/to/change', '/', '\\')

    Returns:
        The path with the new separator, normalized.

    Notes:
        Source 1: https://stackoverflow.com/questions/18707338/print-raw-string-from-variable-not-getting-the-answers#:~:text=To%20turn%20a%20variable%20to%20raw%20str%2C%20just%20use
        Source 2: https://mail.python.org/pipermail/tutor/2011-July/084788.html
    """
    return os.path.normpath(rf"{path}").replace(old_sep, new_sep)


# Main Functions

# Main Code

In [None]:
for country in COUNTRIES:
    # create the country directory
    country_dir = join_paths('data', country)
    if not os.path.exists(country_dir):
        os.makedirs(country_dir)

    # create the country url
    country_url = posixpath.join(BASE_URL, country)
    print(f"Current Country URL: {country_url}")

# Visualizing Functions' Return Values