In [1]:
import nltk
import spacy

`python3 -m spacy download en_core_web_sm`

In [2]:
nlp = spacy.load('en_core_web_sm')

In [10]:
receipt = [
    'Place chicken strips into a large, resealable plastic bag.',
    'In a small bowl, mix the egg, buttermilk and garlic powder.',
    'Pour mixture into bag with chicken.',
    'Seal, and refrigerate 2 to 4 hours.',
    'In another large, resealable plastic bag, mix together the flour, bread crumbs, salt and baking powder.',
    'Remove chicken from refrigerator, and drain, discarding buttermilk mixture.',
    'Place chicken in flour mixture bag.',
    'Seal, and shake to coat.',
    'Heat oil in a large, heavy skillet to 375 degrees F (190 degrees C).',
    'Carefully place coated chicken in hot oil.',
    'Fry until golden brown and juices run clear.',
    'Drain on paper towels.'
]

In [11]:
def to_nltk_tree(node):
    return nltk.Tree(node.orth_, [to_nltk_tree(child) for child in node.children])

def no_punct(tokens):
    return [token for token in tokens if token.pos_ not in "PUNCT"]

In [14]:
doc = nlp(receipt[0])
root = [token for token in doc if token.dep_ == 'ROOT'][0]
to_nltk_tree(root).pretty_print()
print(no_punct(list(root.children)))
[(token, token.dep_) for token in no_punct(doc)]

                  strips                       
    ________________|________________________   
   |               into                      | 
   |                |                        |  
chicken            bag                       | 
   |      __________|__________________      |  
 Place   a  large   ,    resealable plastic  . 
   |     |    |     |        |         |     |  
  ...   ...  ...   ...      ...       ...   ...

[chicken, into]


[(Place, 'compound'),
 (chicken, 'compound'),
 (strips, 'ROOT'),
 (into, 'prep'),
 (a, 'det'),
 (large, 'amod'),
 (resealable, 'amod'),
 (plastic, 'amod'),
 (bag, 'pobj')]

In [6]:
def make_printable(nodes):
    result = []
    for node in nodes:
        texts = list(map(lambda x: x.text, node))
        result.append(' '.join(texts))
    return result
        
def parse_receipt(receipt):
    for i, step in enumerate(receipt, start=1):
        tokens = nlp('I ' + step.lower())
        verbs = [token for token in tokens if token.pos_ == 'VERB']
        print(f'#Step {i}.')
        for verb in verbs:
            subtree = no_punct(list(verb.children))
            # extract direct objects from sentence
            objects = [no_punct(list(token.subtree)) for token in subtree if token.dep_ == 'dobj']
            # extract conditional objects (where, how, etc.)
            cond_objects = [no_punct(list(token.subtree)) for token in subtree if token.dep_ == 'prep']
            # extract additional cond. objects which can accidentally be merged with added "I"
            if verb.dep_ == 'ROOT':
                subject = [token for token in tokens if token.dep_ == 'nsubj'][0]
                accidental_subj_conds = list(no_punct(subject.subtree))[1:]
                if len(accidental_subj_conds) > 0:
                    cond_objects.append(accidental_subj_conds)

            print(f'Action: {verb}, Objects: {make_printable(objects)}, Conditions: {make_printable(cond_objects)}')
        print()

In [7]:
parse_receipt(receipt)

#Step 1.
Action: place, Objects: ['chicken strips'], Conditions: ['into a large resealable plastic bag']
Action: strips, Objects: [], Conditions: []

#Step 2.
Action: mix, Objects: ['the egg buttermilk and garlic powder'], Conditions: ['in a small bowl']

#Step 3.
Action: pour, Objects: ['mixture'], Conditions: ['into bag with chicken']

#Step 4.
Action: seal, Objects: [], Conditions: []
Action: refrigerate, Objects: ['2 to 4 hours'], Conditions: []

#Step 5.
Action: mix, Objects: ['the flour'], Conditions: ['in another large resealable plastic bag']

#Step 6.
Action: remove, Objects: ['chicken from refrigerator'], Conditions: []
Action: drain, Objects: [], Conditions: []
Action: discarding, Objects: ['buttermilk mixture'], Conditions: []

#Step 7.
Action: place, Objects: ['chicken in flour mixture bag'], Conditions: []

#Step 8.
Action: seal, Objects: [], Conditions: []
Action: shake, Objects: [], Conditions: ['to coat']

#Step 9.
Action: heat, Objects: ['oil'], Conditions: ['in a lar