In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk import pos_tag
import numpy as np

In [None]:

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('punkt_tab')

In [None]:
def preprocess_text(text):
    sentences = sent_tokenize(text)
    preprocessed = []
    for sent in sentences:
        sent_clean = re.sub(r'[^\w\s]', '', sent)
        words = word_tokenize(sent_clean)
        words = [word.lower() for word in words if word.lower() not in stopwords.words('english')]
        preprocessed.append((sent, words))
    return preprocessed

In [None]:
def is_task_sentence(sent):
    """
    Determine if a sentence represents a task based on the presence
    of obligation phrases or deadline expressions.
    """
    # Check for obligation phrases like "should", "must"
    if re.search(r'\b(should|must)\b', sent, re.IGNORECASE):
        return True
    # Check for phrases like "has to" or "have to"
    if re.search(r'\b(has|have)\s+to\b', sent, re.IGNORECASE):
        return True
    # Check for deadline expressions (e.g., "by 9pm", "today")
    if re.search(r'\b(by\s+\d+[\d\s:]*[ap]m|by\s+\w+\s+\d+|today|tonight|tomorrow)\b', sent, re.IGNORECASE):
        return True
    return False

In [None]:
def identify_tasks(preprocessed_sents):
    tasks = []
    for original_sent, words in preprocessed_sents:
        if is_task_sentence(original_sent):
            tasks.append(original_sent)
    return tasks

In [None]:
def extract_info(task_sentences):
    extracted = []
    for sent in task_sentences:
        pos_tags = pos_tag(word_tokenize(sent))
        # Extract the first proper noun or pronoun as the entity
        entity = next((word for word, tag in pos_tags if tag in ['NNP', 'PRP']), None)
        # Search for deadline expressions in the sentence
        deadline_match = re.search(r'\b(by\s+\d+[\d\s:]*[ap]m|by\s+\w+\s+\d+|today|tonight|tomorrow)\b', sent, re.IGNORECASE)
        deadline = deadline_match.group(0) if deadline_match else None
        extracted.append({
            'task': sent,
            'entity': entity,
            'deadline': deadline
        })
    return extracted

In [None]:
def main(text):
    preprocessed = preprocess_text(text)
    task_sentences = identify_tasks(preprocessed)
    if not task_sentences:
        print("No tasks found.")
        return

    extracted_info = extract_info(task_sentences)
    task_texts = [info['task'] for info in extracted_info]

    for idx, info in enumerate(extracted_info):
        print(f"Task {idx + 1}:")
        print(f"  Description: {info['task']}")
        print(f"  Entity: {info['entity']}")
        print(f"  Deadline: {info['deadline']}")
        print("---")

In [None]:
if __name__ == "__main__":
    sample_text = "John has to deliver a package by 9pm. He likes to deliver packages. Rahul should clean the room by 5 pm today."
    main(sample_text)
