In [8]:
!pip3 install spacy



In [None]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 12.8/12.8 MB 1.9 MB/s eta 0:00:00
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [3]:
import spacy
import re
import nltk
from nltk.tokenize import sent_tokenize
from spacy.matcher import Matcher

# Load English NLP model
nlp = spacy.load("en_core_web_sm")

In [18]:
text = """Rahul wakes up early every day. He goes to college in the morning and comes back at 3 pm. 
At present, Rahul is outside. He has to buy the snacks for all of us. Rahul should clean the room by 5 pm today."""

In [19]:
def preprocess_text(text):
    sentences = sent_tokenize(text)  # Split into sentences
    return sentences

sentences = preprocess_text(text)
print(sentences)

['Rahul wakes up early every day.', 'He goes to college in the morning and comes back at 3 pm.', 'At present, Rahul is outside.', 'He has to buy the snacks for all of us.', 'Rahul should clean the room by 5 pm today.']


In [20]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\shash\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [21]:
def extract_tasks(sentences):
    tasks = []
    for sentence in sentences:
        doc = nlp(sentence)
        for token in doc:
            if token.pos_ == "VERB":  # Identify action verbs
                if "has to" in sentence or "should" in sentence or "must" in sentence:
                    tasks.append(sentence)
                    break
    return tasks

tasks = extract_tasks(sentences)
print(tasks)

['He has to buy the snacks for all of us.', 'Rahul should clean the room by 5 pm today.']


In [22]:
def extract_person(sentence):
    doc = nlp(sentence)
    persons = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
    
    # If no named entity is found, look for proper nouns (PROPN)
    if not persons:
        persons = [token.text for token in doc if token.pos_ == "PROPN"]
    
    # If still empty, check for pronouns
    if not persons:
        for token in doc:
            if token.text.lower() in ["he", "she", "they"]:
                return token.text.capitalize()
    
    return persons[0] if persons else "Unknown"

for task in tasks:
    print(f"Task: {task}, Person: {extract_person(task)}")

Task: He has to buy the snacks for all of us., Person: He
Task: Rahul should clean the room by 5 pm today., Person: Rahul


In [23]:
def find_last_person(text_list, current_sentence):
    # Get the index of current sentence
    current_index = text_list.index(current_sentence)
    
    # Look backwards through previous sentences
    for sentence in reversed(text_list[:current_index + 1]):
        doc = nlp(sentence)
        # First check for named entities
        persons = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
        if persons:
            return persons[0]
        
        # Then check for proper nouns
        propn = [token.text for token in doc if token.pos_ == "PROPN"]
        if propn:
            return propn[0]
    
    return "Unknown"

def extract_person(sentence, all_sentences):
    doc = nlp(sentence)
    # First try to find named entity or proper noun in current sentence
    persons = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
    if not persons:
        persons = [token.text for token in doc if token.pos_ == "PROPN"]
    
    # If found, return the person
    if persons:
        return persons[0]
    
    # If pronoun is found, look back through previous sentences
    for token in doc:
        if token.text.lower() in ["he", "she", "they"]:
            return find_last_person(all_sentences, sentence)
    
    return "Unknown"

# Process tasks with full context
for task in tasks:
    person = extract_person(task, sentences)
    print(f"Task: {task}, Person: {person}")

Task: He has to buy the snacks for all of us., Person: Rahul
Task: Rahul should clean the room by 5 pm today., Person: Rahul


In [24]:
def extract_deadline(sentence):
    time_phrases = re.findall(r'\b(by\s\d{1,2}\s?[ap]m|by\s\w+|before\s\w+|tomorrow)\b', sentence, re.IGNORECASE)
    return time_phrases[0] if time_phrases else "No deadline specified"

for task in tasks:
    print(f"Task: {task}, Deadline: {extract_deadline(task)}")

Task: He has to buy the snacks for all of us., Deadline: No deadline specified
Task: Rahul should clean the room by 5 pm today., Deadline: by 5 pm


In [None]:
# Task Categorization
def categorize_task(task):
    categories = {
        "Shopping": ["buy", "purchase", "get"],
        "Cleaning": ["clean", "wash", "dust"],
        "Work/Study": ["submit", "complete", "write", "study"],
    }
    
    for category, keywords in categories.items():
        if any(word in task.lower() for word in keywords):
            return category
    return "Other"

# Final structured output
structured_output = []

for task in tasks:
    person = extract_person(task,sentences)
    deadline = extract_deadline(task)
    category = categorize_task(task)

    structured_output.append({
        "Task": task,
        "Person": person,
        "Deadline": deadline,
        "Category": category
    })

# Display results
import pandas as pd
df = pd.DataFrame(structured_output)
print(df)

                                         Task Person               Deadline  \
0     He has to buy the snacks for all of us.  Rahul  No deadline specified   
1  Rahul should clean the room by 5 pm today.  Rahul                by 5 pm   

   Category  
0  Shopping  
1  Cleaning  


In [26]:
# Save to CSV
df.to_csv("extracted_tasks.csv", index=False)

print("Output saved to extracted_tasks.csv")

Output saved to extracted_tasks.csv
