# 1. Imports

In [10]:
import io
import os
import json
import re
from text_to_num import alpha2digit
from PIL import Image
import base64
from io import BytesIO
from google.cloud import vision
from google.cloud.vision import types

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "Key.json"

# 2. Define helper functions

In [74]:
fractions = ['½','⅓','⅔','¼','¾','⅕','⅖','⅗','⅘','⅙','⅚',
            '⅐','⅛','⅜','⅝','⅞','⅑','⅒']

fractions_better = ['1/2','1/3','2/3','1/4','3/4','1/5','2/5','3/5','4/5',
                    '1/6','5/6','1/7','1/8','3/8','5/8','7/8','1/9','1/10']            


def improve_fractions(recipe):

    """
    function to transform from - for example - '½' to '1/2'. Makes the recipe entries 
    easier to edit for the user and enables parsing of fractions. 
    """
    
    for i in range(len(fractions)):
        recipe = recipe.replace(fractions[i], fractions_better[i])
        return recipe



def is_number(n):

    """
    function to check if  the first character of a string is a number. This 
    function only checks the first character to cover cases like "3-4" or "3/4". 
    """
    
    if len(n) == 0:
        return False
    
    try:
        float(n[0])  
        
    except ValueError:
        return False
    return True


def text_to_number(recipe):

    """
    function to transform written numbers in a string ("one hundred horses") to 
    their integer form ("100 horses"). Works for English, Spanish and French. 
    It takes advantage of the fact that the Google Vision API recognises 
    the language of the text in the picture that was taken.
    """    
    if texts[0].locale == "en":
        
        recipe = alpha2digit(recipe, "en")
        return recipe
    
    elif texts[0].locale == "es":
        
        recipe = alpha2digit(recipe, "es")
        return recipe
        
    elif texts[0].locale == "fr":
        
        recipe = alpha2digit(recipe, "fr")
        return recipe
    
    else:
        return recipe



def decode_string_and_api_call(image_string):

    """
    function to decode the base64 decoded string and make a google vision api 
    call with the picture.
    """
    # decode image_string
    decoded_string = base64.b64decode(image_string)

    # Instantiates a client
    client = vision.ImageAnnotatorClient()

    # Perform text detection
    image = types.Image(content=decoded_string)
    response = client.document_text_detection(image=image)
    texts = response.text_annotations
    blocks = response.full_text_annotation

    return texts, blocks

def clean_recipe(recipe):
    
    
    recipe = improve_fractions(recipe)
    
    
    # remove all special characters
    recipe = re.sub('[^A-Za-z0-9 ,;.:-?!""\n]+', '', recipe)
    
    
    # apply the text2num library if the recipe is in French, Spanish or English. 
    # This will transform written numbers (like "one hundred") to 
    # integers in string format (like "100"). If the time permits a german 
    # version of this library will be implemented.
    recipe = text_to_number(recipe)
    
    return recipe

def find_order(recipe):
    
    nums = []
    
    steps = []
    
    for i in range(len(recipe)):
        
        if recipe[i].isnumeric():
            
            nums.append([i, recipe[i]])
            
    print(nums)
            
    # check if first number in string is 1
    if nums[0][1] == "1":
        
            
        index_order = [0]
        
        counter = 2
        
        for i in range(1,len(nums)):
        
            
            # check if there are at least 10 characters between assumed instances of structure in the string.
            if nums[i][1] == str(counter): 
                
                if (nums[i][0] - nums[index_order[-1]][0]) > 10:
                    
                    index_order.append(i)
                    
                    counter = counter + 1
            
        # check if there is at least a succesion of 1, 2, 3 in the string. If that is the case the assumption is that the string is structured. 
        if len(index_order) > 2:
            
            # check if string started with "1"
            if nums[0][0] > 1:
            
                steps.append({"steps": recipe[:nums[0][0]]})
                
                # iterate over index_order except the last element since it needs to be treated differently
                for i in range(len(index_order) -1):
                    
                    steps.append({"steps": recipe[nums[index_order[i]][0]:nums[index_order[i+1]][0]]})
                    
                # append the last step 
                
                steps.append({"steps": recipe[nums[index_order[-1]][0]:]})
            
                return {"instructions":steps}
            
        else:
            
            return {"instructions": recipe}
                    
    else:
        
        return {"instructions": recipe}

# 3. Define ingredient parser function

In [83]:
def parse_ingredients(recipe):
        
    
    #list to save modified base dictionaries 
    ingredients = []
    
    
    recipe = clean_recipe(recipe)

    
    # transform the string into an iterable format, line for line, word for word.
    recipe = [line.split() for line in recipe.splitlines()]
    
    # parse each line of the recipe for unit, quantity and ingredient
    for line in recipe:
        
        length = len(line)
        
        if length == 1:
            
            # either this line is part of the previous line or it is an ingredient that needs no unit or qantity. This code will treat it as an ingredient. 
            # Web will hopefully implement functionality that enables the user to add this line to the previous ingredient if necessary.
            
            ingredients.append({"quantity":None, "unit":None, "ingredient":line[0]})
            continue
            
        elif length == 2:
            
            # this line is most probably an ingredient and a quantity 
            
            
            if (is_number(line[0][0]) and not is_number(line[1][0])):
            
                ingredients.append({"quantity":line[0], "unit":None, "ingredient":line[1]})
                continue
            
            elif (not is_number(line[0][0]) and is_number(line[1][0])):
                
                ingredients.append({"quantity":line[1], "unit":None, "ingredient":line[0]})
                continue
            
            else:
                
                # the last case covers both the possibility that both strings contain a number or that both dont. For both cases the same treatment makes sense.
                # 1. If they both contain numbers something went wrong and saving this line only under ingredient will make it easier for the user to modify it.
                # 2. If they both dont contain numbers they are probably part of the previous line and probably part of the ingredient part of that line. Saving
                # this as one string under ingredient will make it easier for the user to modify this part.  
                ingredients.append({"quantity":None, "unit":None, "ingredient":" ".join(line)})
                continue
                
        elif length > 2:
            
            number_map = [1 if is_number(word) else 0 for word in line]
            
            instances_number = sum(number_map)
                          
            if instances_number ==  0:
                
                ingredients.append({"quantity":None, "unit":None, "ingredient":" ".join(line)})
                continue
                
                              
            elif instances_number == 1:
                
                
                index = number_map.index(1)
                
                if index == 0:
                    
                    ingredients.append({"quantity":line[0], "unit":line[1], "ingredient":" ".join(line[2:])})
                    continue
                    
                elif index == 1:
                    
                    ingredients.append({"quantity":line[1], "unit":line[0], "ingredient":" ".join(line[2:])})
                    continue
                    
                    
                    
                elif index == (length - 2):
                    
                    ingredients.append({"quantity":line[-2], "unit":line[-1], "ingredient":" ".join(line[:-2])})
                    continue
                    
                    
                    
                elif index == (length -1):
                    
                    ingredients.append({"quantity":line[-1], "unit":line[-2], "ingredient":" ".join(line[:-2])})
                    continue
                    
                else:
                    
                    ingredients.append({"quantity":None, "unit":None, "ingredient":" ".join(line)})
                    continue
                    
                    
            else:
                
                # here a check for a comma would be good
                
                ingredients.append({"quantity":None, "unit":None, "ingredient":" ".join(line)})
                continue
                    
    ingredients_dict = {"ingredients": ingredients}
    return ingredients_dict     
                                  
   

# 4. Define instructions parser function

In [84]:
def parse_instructions(recipe):
    
    if len(blocks.pages[0].blocks) < 2:
        
        
        recipe = clean_recipe(recipe)
    

        instructions_dict = find_order(recipe)
        
        return instructions_dict
        
        

    else:
        
        all_blocks = ""

        for a in range(len(blocks.pages[0].blocks)):
    
            all_blocks = all_blocks + "new block"
    
            for b in range(len(blocks.pages[0].blocks[a].paragraphs)):
        
                for c in range(len(blocks.pages[0].blocks[a].paragraphs[b].words)):
            
                    all_blocks = all_blocks + " "
                    for d in range(len(blocks.pages[0].blocks[a].paragraphs[b].words[c].symbols)):
                
                        all_blocks = all_blocks + blocks.pages[0].blocks[a].paragraphs[b].words[c].symbols[d].text
                    
        blocks_splitted = all_blocks.split("new block")[1:]
        
        instructions_dict =  {"instructions": blocks_splitted}
        
        return instructions_dict

#  5. Load picture and encode it with base64

In [85]:
im = Image.open("test.jpg")
buffered = BytesIO()
im.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue())

# 6. Vision api call after decoding string 

In [86]:
texts, blocks = decode_string_and_api_call(img_str)

# 7. Apply ingredients parser

In [87]:
instructions_dict = parse_ingredients(texts[0].description)

In [88]:
instructions_dict

{'ingredients': [{'quantity': None,
   'unit': None,
   'ingredient': 'INICI HUU UT'},
  {'quantity': '4', 'unit': 'Fr', 'ingredient': 'Kenner'},
  {'quantity': '12', 'unit': 'Bund', 'ingredient': 'Basilikum'},
  {'quantity': '34', 'unit': 'Tomaten', 'ingredient': 'Hauptsache:'},
  {'quantity': None, 'unit': None, 'ingredient': 'reif und fruchtig'},
  {'quantity': '4', 'unit': 'EL', 'ingredient': 'bestes Olivenl'},
  {'quantity': None,
   'unit': None,
   'ingredient': 'Salz, schwarzer Pfeffer aus der Mhle'},
  {'quantity': '4',
   'unit': 'groe',
   'ingredient': 'Scheiben italienisches Weibrot'},
  {'quantity': '4', 'unit': None, 'ingredient': 'Knoblauchzehen'}]}

# 8. Apply instructions parser

In [89]:
instructions_dict = parse_instructions(texts[0].description)

In [90]:
instructions_dict

{'instructions': [' INICI HUU UT',
  ' Für 4 Kenner 1 / 2 Bund Basilikum 3 - 4 Tomaten ( Hauptsache : reif und fruchtig ) 4 EL bestes Olivenöl Salz , schwarzer Pfeffer aus der Mühle 4 große Scheiben italienisches Weißbrot 4 Knoblauchzehen']}

In [131]:
test1 = texts[0].description

test1 = re.sub("\n", " ", test1)

In [147]:
improve_fractions(recipe)    
recipe = re.sub('[^A-Za-z0-9 ,;.:-?!""\n]+', '', recipe)
recipe = re.sub("\n", " ", recipe)
recipe = text_to_number(recipe)
    

In [148]:
recipe

'INICI HUU UT Fr 4 Kenner 12 Bund Basilikum 34 Tomaten Hauptsache: reif und fruchtig 4 EL bestes Olivenl Salz, schwarzer Pfeffer aus der Mhle 4 groe Scheiben italienisches Weibrot 4 Knoblauchzehen '

In [149]:
def find_order(steps):
    nums = []
    for i in range(len(steps)):
        
        if steps[i].isnumeric():
            nums.append([i, steps[i]])
    # print(nums)
    steps_list = []
    for i in range(len(nums)):
        if i == len(nums) - 1:
            steps_list.append(steps[int(nums[i][0]):])
        for j in range(1, len(nums)):
            if int(nums[j][1]) - int(nums[i][1]) == 1:
                if int(nums[j][0]) - int(nums[i][0]) > 10:
                    steps_list.append(steps[int(nums[i][0]):int(nums[j][0])])
    # print(steps_list)
    indices = []
    for i in range(len(steps_list)):
        for j in range(len(steps_list)):
            if i == j:
                continue
            if steps_list[i] in steps_list[j]:
                indices.append(i) 
    print(indices)
    return steps_list

In [150]:
steps_list = find_order(recipe)

[1, 1, 2]


In [151]:
print(steps_list)

['2 Bund Basilikum ', '34 Tomaten Hauptsache: reif und fruchtig ', '34 Tomaten Hauptsache: reif und fruchtig 4 EL bestes Olivenl Salz, schwarzer Pfeffer aus der Mhle ', '34 Tomaten Hauptsache: reif und fruchtig 4 EL bestes Olivenl Salz, schwarzer Pfeffer aus der Mhle 4 groe Scheiben italienisches Weibrot ', '4 Knoblauchzehen ']


In [157]:
nums = []
for i in range(len(recipe)):
        
        if recipe[i].isnumeric():
            nums.append([i, recipe[i]])
            
print(nums)


[[16, '4'], [25, '1'], [26, '2'], [43, '3'], [44, '4'], [84, '4'], [141, '4'], [179, '4']]


In [9]:
def find_order(recipe):
    
    nums = []
    
    steps = []
    
    for i in range(len(recipe)):
        
        if recipe[i].isnumeric():
            
            nums.append([i, recipe[i]])
            
    # check if first number in string is 1
    if nums[0][1] == "1":
            
        index_order = [0]
            
        for i in range(1,len(nums)):
            
            counter = 2
            
            # check if there are at least 10 characters between assumed instances of structure in the string.
            if nums[i][1] == str(counter)  and (nums[i][0] - nums[index_order[-1]][0]) > 10:
                    
                index_oder.append(i)
                    
                counter  += 1
                continue
        
        # check if there is at least a succesion of 1, 2, 3 in the string. If that is the case the assumption is that the string is structured. 
        if len(index_order) > 2:
            
            
            # check if string started with "1"
            if nums[0][0] > 1:
            
                steps.append({"steps": recipe[:nums[0][0]]})
                
                # iterate over index_order except the last element since it needs to be treated differently
                for i in range(len(index_order) -1):
                    
                    
                    steps.append({"steps": recipe[nums[index_order[i]][0]:nums[index_order[i+1]][0]]})
                    
                # append the last step 
                
                steps.append({"steps": recipe[nums[index_order[-1]][0]:]})
                
                return {"instructions":steps}
                
                
        else:
            
            return {"instructions": recipe}
                    
    else:
        
        return {"instructions": recipe}
                
            

In [12]:
mytest = "hello ½"

improve_fractions(mytest)

'hello 1/2'

In [13]:
mytest

'hello ½'

In [91]:
tester = "these are my instructions: 1. chop onions 2. fry onions 3. eat onions 4. digest onion"


instructions_dict = find_order(tester)

[[27, '1'], [42, '2'], [56, '3'], [70, '4']]
2
3
4
worked
[{'steps': 'these are my instructions: '}, {'steps': '1. chop onions '}, {'steps': '2. fry onions '}, {'steps': '3. eat onions '}, {'steps': '4. digest onion'}]


In [92]:
instructions_dict

{'instructions': [{'steps': 'these are my instructions: '},
  {'steps': '1. chop onions '},
  {'steps': '2. fry onions '},
  {'steps': '3. eat onions '},
  {'steps': '4. digest onion'}]}

In [93]:
img_str

b'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAvQD8ADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDtff1pOTxS8UZ4/GuY0FAxmlH1opOKYDv1pM9cd6T6Umc0AO5HFKf5U0849aXoMGgA5oAoxSUAGMmjPPNKBijFAAevQAUd/agj3pOtABQOvSgngCjGKAF6UUUlMBetAHPWj8aD2oAXjOKDSDFB6UCFxg+1J0JNHGfeloAaAd