In [1]:
import pandas as pd

import numpy as np

import requests
from bs4 import BeautifulSoup

import import_ipynb

import random
import re

## Utils

In [2]:
def random_dates(start, end, n=10):
    """
    Args:
        -start: start date in pandas.to_datetime format
        -end: end date in pandas.to_datetime format
        -n: number of total dates.
    Returns:
        -dates sample values.
    """
    start_u = start.value//10**9
    end_u = end.value//10**9

    return pd.to_datetime(np.random.randint(start_u, end_u, n), unit='s')

In [3]:
def split_menu_items(df=pd.DataFrame, food_upper_index=36, desserts_upper_index=38, soda_upper_index=47):
    """
    Args:
        -df: Menu items in pandas DataFrame
        -food_upper_index: upper index to slice food elements.
        -desserts_upper_index: upper index to slice dessert elements.
        -soda_upper_index: upper index to slice soda elements.
    Returns:
        - slice menu items in dict format
    """
    if not isinstance(df, pd.DataFrame):
        raise Exception("df must be pandas DataFrame object")
    
    # foods
    foods = df[0:food_upper_index]
    foods_dict = foods.to_dict()

    # desserts
    desserts = df[food_upper_index:desserts_upper_index]
    desserts.reset_index(drop=True, inplace=True)
    desserts_dict = desserts.to_dict()

    # drinks
    drinks = df[desserts_upper_index:soda_upper_index]
    drinks.reset_index(drop=True, inplace=True)
    drinks_dict = drinks.to_dict()

    # beers
    beers = df[soda_upper_index:]
    beers.reset_index(drop=True, inplace=True)
    beers_dict = beers.to_dict()
    
    return foods_dict, desserts_dict, drinks_dict, beers_dict

In [4]:
def experiment(n_iter, foods_dict, desserts_dict, drinks_dict, beers_dict, key1, key2) -> list:
    """
    Args:
        -n_iter: number of iterations for experiment.
        -foods_dict: food items in dict format.
        -desserts_dict: dessert items in dict format.
        -drinks_dict: drink items in dict format.
        -beers_dict: beer items in dict format.
        -key1: Name of dictionarie key or "column", related with items names.
        -key2: Name of dictionarie key or "column", related with items prices.
    Returns:
        -clients list object with all simulated orders.
    """
    from my_models import person
    clients = []

    for i in range(0, n_iter):
        # Menu categories
        # range based on number of observations

        # food index
        i = random.choice(range(len(foods_dict[key1])))

        # dessert index
        j = random.choice(range(len(desserts_dict[key1])))

        # drink index
        k = random.choice(range(len(drinks_dict[key1])))

        # beer index
        l = random.choice(range(len(beers_dict[key1])))

        # Random orders
        client_type = random.choice([person().man, person().women, person().child])

        client_food = [foods_dict[key1][i], foods_dict[key2][i]]
        client_drink = [drinks_dict[key1][k], drinks_dict[key2][k]]

        if client_type == 'man':
            client_beer = [beers_dict[key1][l], beers_dict[key2][l]]
            client_dessert = [desserts_dict[key1][j], desserts_dict[key2][j]]
            client =  [client_food, client_dessert, client_drink, client_beer, client_type]
        elif client_type == 'woman':
            client_beer = [0.0]
            client_dessert = [postres_dict[key1][j], postres_dict[key2][j]]
            client = [client_food, client_dessert, client_drink, client_beer, client_type]
        else:
            client_dessert = [0.0]
            client_beer = [0.0]
            client = [client_food, client_dessert, client_drink, client_beer, client_type]

        clients.append(client)
        
    return clients

https://stackoverflow.com/questions/50559078/generating-random-dates-within-a-given-range-in-pandas

## Web scrape

In [5]:
# Create function for web-scraping and return pandas DataFrame as output
def get_cevi_menu(url='https://www.lacevicheriatabasco.com/la-venta'):
    """
    Python function to scrape menu data from "La Cevichería de Tabasco" Restaurant.
    Args:
        -param url: link to web page or the place where the restaurant menu it shows up.
    Return:
        -pandas DataFrame (restaurant menu)
    """
    page_response = requests.get(url)
    soup = BeautifulSoup(page_response.content, 'html.parser')
    
    # find all tags to scrape data
    items = soup.find_all('div', class_='menu-item')
    
    target_data = []
    for item in items:
        try:
            title = item.find('div', class_='menu-item-title').text.replace('\n', '')
            price = item.find('div', class_='menu-item-price-bottom').text.replace('\n', '')
            info = [title, price]
        except:
            continue
        target_data.append(info)
        
    df = pd.DataFrame(target_data, columns=['Platillo', 'Precio'])
    df['Precio'] = df['Precio'].str.replace('$', '')
    df['Precio'] = df['Precio'].astype(float)
    
    return df

In [6]:
def get_cevi_gn() -> pd.DataFrame:
    """
    Args:
        -param url: link to web page or the place where the restaurant menu it shows up.
    Return:
        -pandas DataFrame (restaurant menu)
    """
    import re
    
    url="https://www.lacevicheriatabasco.com/menu-temporal-gaviotas"
    response_cevi = requests.get(url)
    soup = BeautifulSoup(response_cevi.content, 'html.parser')

    items = soup.find_all('strong')
    items_text = [item.get_text(strip=True) for item in items]
    df_gn = pd.DataFrame(items_text, columns=["Platillo"])
    df_gn = df_gn[:36]
    df_gn = df_gn.drop(labels=[15], axis=0).reset_index(drop=True)
    
    prices = re.findall(r'\d+', str(df_gn["Platillo"]))
    prices_df = pd.DataFrame(prices, columns=['Precio'])
    
    # Drop unnecessary columns
    prices_df.drop([0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 41, 43, 45, 47, 54, 56, 57, 59, 61, 62, 64, 66],axis=0, inplace=True)
    prices_df.reset_index(drop=True, inplace=True)
    
    menu_gn = pd.concat([df_gn, prices_df], axis=1)
    menu_gn.reset_index(drop=True, inplace=True)
    
    drinks_gn = get_cevi_menu("https://www.lacevicheriatabasco.com/menu-temporal-gaviotas")
    
    menu_gn = pd.concat([menu_gn, drinks_gn], axis=0).reset_index(drop=True)
    menu_gn["Precio"] = menu_gn["Precio"].astype(float)
    menu_gn["Precio"] = menu_gn[["Precio"]].replace(to_replace=[24, 25, 26, 27, 28], value=95)
    
    return menu_gn

https://stackoverflow.com/questions/38516481/trying-to-remove-commas-and-dollars-signs-with-pandas-in-python

https://stackoverflow.com/questions/33964785/saving-scraped-result-set-into-a-csv-file