In [30]:
!pip install word2number

Defaulting to user installation because normal site-packages is not writeable
Collecting word2number
  Downloading word2number-1.1.zip (9.7 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: word2number
  Building wheel for word2number (pyproject.toml): started
  Building wheel for word2number (pyproject.toml): finished with status 'done'
  Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5589 sha256=15dc5ee13f621d219e05c60bf76ab2e8b387f4cd4604534d3f556628ebed271b
  Stored in directory: c:\users\adnan\appdata\local\pip\cache\wheels\5b\7

In [39]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_md")


In [219]:
number_dict = {"zero": 0, "one": 1, "two": 2, "three": 3, "four": 4, "five": 5, "six": 6, "seven": 7, "eight": 8, "nine": 9, "ten": 10, "eleven": 11, "twelve": 12, "thirteen": 13, "fourteen": 14, "fifteen": 15, "sixteen": 16, "seventeen": 17, "eighteen": 18, "nineteen": 19, "twenty": 20, "twenty-one": 21, "twenty-two": 22, "twenty-three": 23, "twenty-four": 24}
time_frames = ['week', 'month', 'year']

def visualize_dependency_parse(text):
    doc = nlp(text)
    displacy.render(doc, style="dep", jupyter=True, options={"distance": 100, "compact": True})

In [234]:
def extract_info(text):
    doc = nlp(text)
    extracted_info = {"time": None, "date": None, "frequency": None, "task": None}

    for token in doc:
        if token.ent_type_ == "TIME":
            if token.dep_ == 'pobj':
                continue

            children = [c for c in token.children if c.dep_ == "nummod"]
            if token.head.dep_ == "pobj":
                if token.text.isdigit():
                    extracted_info["time"] = f"{token.lower_} {token.head.lower_}"
                else:
                    extracted_info["time"] = f"{number_dict[token.lower_]} {token.head.lower_}"

            else:
                extracted_info["time"] = token.lower_

        if token.lower_ == 'night':
            print([c.lower_ for c in token.children])
        
        if token.ent_type_ == "DATE":

            print(f"""
                Current Token: {token.lower_}
                token head: {token.head.lower_}
                current dep: {token.dep_}
                head dep: {token.head.dep_}

                current children: {[c.lower_ for c in token.children]}
                
            """)
            
            children = [c for c in token.children]
            if len(children) > 0:
                if children[0].dep_ == "det":
                    extracted_info["frequency"] = f"{children[0].lower_} {token.lower_}" 
                else:
                    extracted_info["date"] = f"{children[0].lower_} {token.lower_}"
            else:
                extracted_info["date"] = token.text

        if token.pos_ == "VERB":
            children = [c for c in token.children]
            for child in children:
                if child.dep_ == "dobj":
                    extracted_info["task"] = f"{token.text} {child.text}"

        if token.dep_ == "compound":
            children = [c for c in token.children]
            if len(children) == 0:
                continue
            if children[0].pos_ == "ADJ" and len(children) == 2:
                extracted_info["time"] = f"{children[0].lower_} {children[1].lower_}"

    return extracted_info

In [235]:
# Test the function with examples
sentences = [
    "Remind me to play football next monday at midday",
    "Remind me to submit the report next friday night",
    "Remind me to buy groceries every monday at two pm",
    "Set an alarm every monday at 3 am",
]

for sentence in sentences:
    visualize_dependency_parse(sentence)
    print(extract_info(sentence))
    
    


                Current Token: next
                token head: monday
                current dep: amod
                head dep: npadvmod

                current children: []
                
            

                Current Token: monday
                token head: play
                current dep: npadvmod
                head dep: xcomp

                current children: ['next']
                
            
{'time': None, 'date': 'next monday', 'frequency': None, 'task': 'play football'}



                Current Token: next
                token head: night
                current dep: amod
                head dep: npadvmod

                current children: []
                
            

                Current Token: friday
                token head: night
                current dep: compound
                head dep: npadvmod

                current children: []
                
            
['next', 'friday']
{'time': 'night', 'date': 'friday', 'frequency': None, 'task': 'submit report'}



                Current Token: monday
                token head: buy
                current dep: npadvmod
                head dep: xcomp

                current children: ['every']
                
            
{'time': '2 pm', 'date': None, 'frequency': 'every monday', 'task': 'buy groceries'}



                Current Token: monday
                token head: set
                current dep: npadvmod
                head dep: ROOT

                current children: ['every']
                
            
{'time': '3 am', 'date': None, 'frequency': 'every monday', 'task': 'Set alarm'}
