In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

from fpdf import FPDF
from contextlib import redirect_stdout
# from reportlab.pdfgen.canvas import Canvas
import os

pd.set_option('display.max_columns', None)

#remove the measurements
import unicodedata
unicodedata.numeric(u'⅕')
unicodedata.name(u'⅕')
import spacy
import re
# pip install spacy
# !python -m spacy download en_core_web_sm

In [48]:
# Starting coding

# 1st function - User inputs the ingredients he has (or wants to use)
def get_existing_ingredients():
    ingredients = input("Input the ingredients you choosed, separated by ',': ")
    list_confirmation = input('Is your the list ready? please confirm (Yes/No): ')
    print('\n')
    list_confirmation = list_confirmation.lower()
    if list_confirmation == "yes" or list_confirmation == "y":
        get_recipes(ingredients)
    else:
        get_existing_ingredients()


# 2nd step - Code will search at the url the recipes containing the inputted ingredients
def get_recipes(ingredients):
    url = "https://edamam-recipe-search.p.rapidapi.com/search"
    querystring = {"q":ingredients}
    headers = {
    "X-RapidAPI-Key": "636e6bafbemsh4d60f9770bbe76cp1e897ejsn1e8e8c2c9a8c",
    "X-RapidAPI-Host": "edamam-recipe-search.p.rapidapi.com"
    }
    
    response = requests.request("GET", url, headers=headers, params=querystring)

    #data is always given for a total of 10 recipes
    recipe_data = pd.json_normalize(response.json()['hits'])
    recipe_data1 = recipe_data.rename(columns={'recipe.ingredientLines' : 'ingridient'})

    #Put each ingredient in a row, duplicating the recipe rows
    df = recipe_data1.explode('ingridient')

    #splits the words, tokenizes them and assigns them a role, then a search is made for the searched roles
    base_model = spacy.load('en_core_web_sm')

    measurements = re.compile(r'(sliced|Sliced|halves|Halves|quart|pure|raw|unsweetened|Flaky|extra|Extra|fresh|fresh|juice|Juice|clear|½|weight|Small|small|tub|Tablespoon|lb|homemade|bowl|g|C|Tablespoons|1|2|3|4|5|6|7|8|9|0|medium|Medium|¼|Extra Virgin|slices|kg|100ml|200m|qts|lqts|lbs|tsp|tbs|Tbs|tbsp|bulb|cube|clove|cup|drop|ounce|oz|pinch|pound|teaspoon|large|grams|whole|tablespoon)s?')
    extracted = []

    for ix, row in df.iterrows():
    #     print('\r', "Extracting ingredient for row", ix, end='')
        tokens = base_model(row['ingridient'])
        extract = ''
        for token in tokens:
            if (token.dep_ in ['nsubj', 'ROOT']) and (token.pos_ in ['NOUN', 'PROPN']) and (not measurements.match(token.text)):
            #explore children
                for child in token.children:
                    if (not measurements.match(child.text)) and (child.dep_ in ['amod', 'compound']):
                        extract += child.text + ' '
                extract += token.text + ' '
        extracted.append(extract) 

    extracted_clean = []

    for item in extracted:
        item1 = item.strip()    
        extracted_clean.append(item1)

    #convert to dataframe to view and cleaning info
    clean_recipe = df[['recipe.label', 'recipe.url', 'recipe.healthLabels', 'recipe.calories', 'recipe.totalTime','recipe.mealType']]
    clean_recipe['ingredient'] = extracted_clean
    nan_value = float("NaN")
    clean_recipe.replace("", nan_value, inplace=True)
    clean_recipe.dropna(subset = ["ingredient"], inplace=True)
 
   
    # Let's show only the shorter recipes (less ingredients missing) and sort by smaller
    shorter_dict = {}
    for x in range(0,9):
        clean_recipe_x = clean_recipe.loc[x]
        list_recipe_x = list(clean_recipe_x['ingredient'])
        shorter_dict[x] = len(list_recipe_x)
    shorter_dict = (dict(sorted(shorter_dict.items(), key=lambda item: item[1])))
    shorter_list = list(shorter_dict.keys())
    
    #     Here is were we set 3 recipes from 9 as feeedback (the 3 most shorter)
    shorter_list = shorter_list[0:3]
    get_ingredient_detail(shorter_list, clean_recipe, ingredients)

    
# 3rd step - At this point we have the initial ingredients and the recipes including them
# On this step we will call the function that does web-scrapping of the missing ingredients
def get_ingredient_detail(shorter_list, clean_recipe, ingredients):
    for x in range(0,3):
        clean_recipe_x = clean_recipe.loc[shorter_list[x]]
        list_recipe_x = list(clean_recipe_x['ingredient'])
        print('\n')
        print("RECIPE No",x+1,'-', (clean_recipe ['recipe.label'][x]).unique())
        print('-------------')
        print('\n','- Existing Ingredients       : ', ingredients) 
        print(' - Missing Ingredients (',len(list_recipe_x),')  : ', (', '.join(list_recipe_x)))
        print(' - Cooking Time (minutes)     : ', (clean_recipe ['recipe.totalTime'][x]).mean())
        print(' - Recipe Calories (1 portion): ', (clean_recipe ['recipe.calories'][x]).unique())
        print(' - Cooking Instructions       : ', (clean_recipe ['recipe.url'][x]).unique())
        missing_ingredients(list_recipe_x)
        you_want_copy(list_recipe_x, x, clean_recipe, ingredients)
    

In [49]:

# 4rd step - This is the function that web-scrap the missing ingredients and show results
def missing_ingredients(list_of_ingredients):
    list_of_ingredients = [item.lower() for item in list_of_ingredients]
    order = 0
    print('\n', 'GROCERY LIST')
    for ingredient in list_of_ingredients:
        
        #         Initializating variables
        order = order+1
        url = "https://www.abelandcole.co.uk/shop/search?term="+ingredient+"&searchCat=products"
        response = requests.get(url)
        soup = BeautifulSoup(response.content)

        #         Searching for ingredient name at the web
        name_search = soup.find_all("div", attrs={'class':'product-title'}) 
        name = [(option.text).replace('\n', '') for option in name_search]

        #         Searching for ingredient price at the web
        price_search = soup.find_all("div", attrs={'class':'product-price'})
        price = [(option.text).replace('\n', '').replace('\r', '').replace(' ', '').replace('£', '    £') for option in price_search]

        final_list = []
        
        #         Appending name+price as single list element
        for i in range(len(name)):
            final_list.append(name[i]+' '+price[i])

        #         Showing results or error message
        if not final_list:
            print('\n', str(order)+"."+ingredient.upper())
            
        #             Calling the 2nd function to search 
            single_ingredient(ingredient)
        else:
            print('\n', str(order)+"."+ingredient.upper())
            print('\n'.join(final_list[:2]))
    
    print('\n')
    print('================================================================================')
    print('Usual Supplier (unless different is indicated) is: https://www.abelandcole.co.uk')
    print('Considerations for usual Supplier:','\n','     - Price without taxes','\n','     - if there is a 2nd price detailed consider as "offert"')


# 5th step - on each recipe iteration (where we search missing ingredientes) code asks user if wants a .pdf copy
def you_want_copy(list_recipe_x, x, clean_recipe, ingredients):    
    #     Checking if user wants a copy file with info
    print('================================================================================')
    create_file = input("Would you like to create a '.pdf' file with this info? (Yes/No):")
    create_file = create_file.lower()
    if create_file == "yes" or create_file == "y":
        print("Preparing '.pdf' file..............")
        create_results_files(list_recipe_x, x, clean_recipe, ingredients)
        txt_file_name = str(x+1)+'.txt'
        os.remove(txt_file_name)
        print('YOUR FILE IS READY, HAVE A NICE DAY!')
        print('================================================================================')
    else:
        print('NO FILE WAS CREATED, HAVE A NICE DAY!')
        print('================================================================================')

        

# This is a sub-function from step 4, doing web-scrapping in a 2nd webpage 
# just in case 1st one doesn't include the ingredient    
def single_ingredient(ingredient):
    ingredient = ingredient.lower()
    url = "https://www.dutchexpatshop.com/en/catalogsearch/result/?q="+ingredient
    response = requests.get(url)
    soup = BeautifulSoup(response.content)
    
    search = soup.find_all("div", attrs={'class':'product-item-info'})
    list = [option.text for option in search]
    list1 = [info.replace('        Add to Cart ', '') for info in list]
    
    if not list1:
        print(" Sorry, couldn't find this ingredient, please check and try again",'\n', "We tried in this 2 webpages: www.abelandcole.co.uk & www.dutchexpatshop.com")
    else:
        print('\n'.join(list1[:2]))
        print("** This specific product was found at: www.dutchexpatshop.com - Price with and without tax")   

        
# This is a sub-function from step 5, if users do wants a .pdf copy of the recipe
# The code will create a .txt file first and then will convert to .pdf (erasing original .txt)      
def create_results_files(list_recipe_x, x, clean_recipe, ingredients):
    # 1st create the txt result file     
    txt_file_name = str(x+1)+'.txt'
    with open(txt_file_name, 'w') as file:
        with redirect_stdout(file):
            output = print("RECIPE No",x+1,'-', (clean_recipe ['recipe.label'][x]).unique())
            output = print('-------------')
            output = print('\n','- Existing Ingredients       : ', ingredients)
            output = print(' - Missing Ingredients (',len(list_recipe_x),')  : ', (', '.join(list_recipe_x))) 
            output = print(' - Cooking Time (minutes)     : ', (clean_recipe ['recipe.totalTime'][x]).mean())
            output = print(' - Recipe Calories (1 portion): ', (clean_recipe ['recipe.calories'][x]).unique())
            output = print(' - Cooking Instructions       : ', (clean_recipe ['recipe.url'][x]).unique())
            output = str(missing_ingredients(list_recipe_x))    
    
    # 2nd convert txt result file into pdf  
    pdf_file_name = str(x+1)+'.pdf'
    with open(txt_file_name,'rb') as research:
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font('arial', size=10)
        for line in research:
            info = line.decode('latin-1')
            pdf.cell(20, 4, txt=info, ln=1, align='L')
        pdf.output(pdf_file_name)


In [51]:
# list_of_ingredients = ['lettuCE', 'mayonnaise', 'Carrot', 'AlfajOR', 'WGETHTyy']

get_existing_ingredients()