In [8]:
import re
import json
import nltk
import torch
from dateutil import parser
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.tag import pos_tag
from sentence_transformers import SentenceTransformer, util

In [9]:
bert_model = SentenceTransformer("all-MiniLM-L6-v2")
task_categories = {
    "Education": ["study", "learn", "read", "write"],
    "Chores": ["clean", "wash", "cook", "sweep"],
    "Work": ["write", "email", "call", "submit"],
    "Shopping": ["buy", "purchase", "order"],
    "Travel": ["go", "visit", "travel", "commute"],
    "Health": ["exercise", "run", "walk", "meditate"]
}

In [10]:
date_patterns = [
    r'\bby (Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\b',
    r'\bby (next week|end of the day|tomorrow)\b',
    r'\bby (\d{1,2}[-/\s]\d{1,2}[-/\s]\d{2,4})\b',
    r'\bby (\d{1,2}\s?(AM|PM|am|pm))\b'
]

def extract_deadline(text):
    for pattern in date_patterns:
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            try:
                return parser.parse(match.group(), fuzzy=True).strftime('%Y-%m-%d %I:%M %p')
            except:
                return match.group()
    return None


In [11]:
def resolve_pronouns(sentences):
    entity_map = {}
    previous_name = None
    
    for sentence in sentences:
        words = word_tokenize(sentence)
        pos_tags = pos_tag(words)
        
        for word, tag in pos_tags:
            if tag == "NNP":  
                previous_name = word
            elif word.lower() in ["he", "she", "they"] and previous_name:
                entity_map[word.lower()] = previous_name
    
    return entity_map

def categorize_task(task_verb):
    task_embedding = bert_model.encode(task_verb, convert_to_tensor=True)
    
    best_category = None
    max_similarity = -1

    for category, words in task_categories.items():
        category_embeddings = bert_model.encode(words, convert_to_tensor=True)
        similarity = util.pytorch_cos_sim(task_embedding, category_embeddings).max().item()
        
        if similarity > max_similarity:
            max_similarity = similarity
            best_category = category

    return best_category if best_category else "Uncategorized"

In [12]:
def extract_tasks(text):
    sentences = sent_tokenize(text)
    entity_map = resolve_pronouns(sentences)
    tasks = []
    nothing_list = []
    
    for sentence in sentences:
        words = word_tokenize(sentence)
        pos_tags = pos_tag(words)
        
        # Identify action verbs only
        action_verbs = [word for word, tag in pos_tags if tag.startswith("VB") and word.lower() not in ["is", "are", "was", "were", "am", "be", "been", "being", "has", "have", "had", "do", "does", "did"]]
        
        if action_verbs:
            task_action = action_verbs[-1]
            responsible_person = None
            deadline = extract_deadline(sentence)
            category = categorize_task(task_action)
            
            for word, tag in pos_tags:
                if tag == "NNP":
                    responsible_person = word
                elif tag == "PRP" and word.lower() in entity_map:
                    responsible_person = entity_map[word.lower()]
            
            tasks.append({
                "sentence": sentence,
                "task": task_action,
                "assigned_to": responsible_person,
                "deadline": deadline,
                "category": category,
                "status": "pending"
            })
        else:
            nothing_list.append({"sentence": sentence})
    
    return tasks, nothing_list

In [13]:
text = "Rahul wakes up early every day. He goes to college in the morning and comes back by 3 PM. At present, Rahul is outside. He has to buy the snacks for all of us. Alex has to wash the cup. She will study for the test. I have to clean the kitchen."
task_list, nothing_list = extract_tasks(text)
print("Tasks to be done:")
for task in task_list:
    print(f"Sentence: {task['sentence']}")
    print(f"Task: {task['task']}")
    print(f"Assigned To: {task['assigned_to']}")
    print(f"Deadline: {task['deadline']}")
    print(f"Category: {task['category']}")
    print("-" * 50)

print("Nothing list:")
for item in nothing_list:
    print(f"Sentence: {item['sentence']}")
    print("-" * 50)

Tasks to be done:
Sentence: Rahul wakes up early every day.
Task: wakes
Assigned To: Rahul
Deadline: None
Category: Work
--------------------------------------------------
Sentence: He goes to college in the morning and comes back by 3 PM.
Task: comes
Assigned To: PM
Deadline: 2025-02-18 03:00 PM
Category: Chores
--------------------------------------------------
Sentence: He has to buy the snacks for all of us.
Task: buy
Assigned To: Rahul
Deadline: None
Category: Shopping
--------------------------------------------------
Sentence: Alex has to wash the cup.
Task: wash
Assigned To: Alex
Deadline: None
Category: Chores
--------------------------------------------------
Sentence: She will study for the test.
Task: study
Assigned To: Alex
Deadline: None
Category: Education
--------------------------------------------------
Sentence: I have to clean the kitchen.
Task: clean
Assigned To: None
Deadline: None
Category: Chores
--------------------------------------------------
Nothing list:
S