In [12]:
import re
import nltk
import spacy
import pandas as pd
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from IPython.display import display
nltk.download('punkt')
nltk.download('stopwords')
nlp = spacy.load("en_core_web_sm")

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/parthabhang/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/parthabhang/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [13]:
# Function to Read I/P file
def read_text_file(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            text = file.read()
        return text
    except FileNotFoundError:
        print("Error: The file was not found.")
        return ""

In [14]:
# Function to Clean Text
def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)
    sentences = sent_tokenize(text)
    return sentences

In [15]:
# Function to POS for Verb Only
def pos_tagging(sentence):
    doc = nlp(sentence)
    verbs = [token.text for token in doc if token.pos_ == "VERB"]
    return verbs


In [16]:
# Function to Lemmatize Verb
def lemmatize_verb(verb):
    doc = nlp(verb)
    return doc[0].lemma_

In [17]:
# Function to Get only main verb
def extract_main_verb(sentence):
    doc = nlp(sentence)
    for token in doc:
        if token.pos_ == "VERB" or token.dep_ == "ROOT":  # Get main verb
            return lemmatize_verb(token.text)
    return "No verb found"

In [18]:
# Function to Identify Task
def identify_tasks(sentences):
    task_verbs = []
    for sentence in sentences:
        verb = extract_main_verb(sentence)
        if verb != "No verb found":
            task_verbs.append(verb)
    return task_verbs

In [19]:
# Function to Extract name
def extract_who_task(task_sentence):
    doc = nlp(task_sentence)
    for token in doc:
        if token.pos_ in ["PROPN", "PRON"]:  # Proper Noun or Pronoun
            return token.text
    return "Unknown"

In [20]:
# Function to Extract Deadline
def extract_deadline(task_sentence):
    doc = nlp(task_sentence)
    for ent in doc.ents:
        if ent.label_ in ["TIME", "DATE"]:
            return ent.text
    return "No deadline specified"

In [21]:
# Function for Categorization of task
def categorize_task(task_sentence):
    categories = {
        "Personal": ["wake", "sleep", "play", "workout", "exercise", "relax", "rest", "meditate", "travel", "go", "text", "call"],
        "Household": ["clean", "wash", "cook", "sweep", "mop", "laundry", "repair", "arrange", "fix", "organize"],
        "Shopping": ["buy", "purchase", "order", "shop", "pay", "pick", "collect", "return", "exchange"],
        "Work": ["review", "submit", "study", "meeting", "code", "complete", "write", "read", "analyze", "plan", "discuss", "present", "schedule", "design", "report"],
        "Appointments & Meetings": ["schedule", "attend", "reschedule", "organize", "cancel", "confirm", "set up", "book", "meet", "invite"],
        "Communication": ["call", "email", "text", "message", "reply", "respond", "chat", "discuss", "notify", "update", "report"],
        "Travel & Booking": ["book", "reserve", "cancel", "travel", "pack", "plan", "schedule", "check-in", "board", "arrange", "visit", "tour"],
        "Health & Fitness": ["exercise", "run", "walk", "jog", "gym", "yoga", "meditate", "train", "stretch", "workout", "hydrate"],
        "Finance & Payments": ["pay", "transfer", "deposit", "withdraw", "invest", "save", "budget", "buy", "sell", "donate", "fund"]
    }
    for category, keywords in categories.items():
        if any(keyword in task_sentence.lower() for keyword in keywords):
            return category
    return "Other"

In [None]:
# Load the file
file_path = "input.txt"
text = read_text_file(file_path)
# perform only if file found.
if text:
    sentences = preprocess_text(text)
    task_data = []
    for sentence in sentences:
        task = extract_main_verb(sentence)
        category = categorize_task(task)
        who = extract_who_task(sentence)
        deadline = extract_deadline(sentence)
        task_data.append({"Sentence": sentence, "Task": task, "Category": category, "Who": who, "Deadline": deadline})
    df = pd.DataFrame(task_data)
    # display the result in table
    display(df)

    # Save output in csv file.
    df.to_csv("task_output.csv", index=False)
    print("Output saved as task_output.csv")

Unnamed: 0,Sentence,Task,Category,Who,Deadline
0,Sarah must pick up her prescription from the p...,pick,Shopping,Sarah,3 PM
1,Michael must submit his tax documents before t...,submit,Work,Michael,next Tuesday
2,Emma must prepare the presentation for tomorro...,prepare,Other,Emma,tomorrow
3,William goes swimming every Thursday morning a...,go,Personal,William,morning
4,Charlotte should call her grandmother this eve...,call,Personal,Charlotte,this evening
5,James will start his new job at the tech compa...,start,Other,James,next month
6,Oliver must fix the leaking faucet in the kitc...,fix,Household,Oliver,this weekend
7,Isabella must renew her driver's license befor...,renew,Other,Isabella,No deadline specified
8,Thomas must finish painting the living room wa...,finish,Other,Thomas,Sunday
9,Elizabeth should schedule a haircut appointmen...,schedule,Work,Elizabeth,next week


Output saved as task_output.csv
                                                               Sentence     Task                Category         Who                  Deadline
         Sarah must pick up her prescription from the pharmacy by 3 PM.     pick                Shopping       Sarah                      3 PM
Michael must submit his tax documents before the deadline next Tuesday.   submit                    Work     Michael              next Tuesday
      Emma must prepare the presentation for tomorrow's client meeting.  prepare                   Other        Emma                  tomorrow
        William goes swimming every Thursday morning at the local pool.       go                Personal     William                   morning
                    Charlotte should call her grandmother this evening.     call                Personal   Charlotte              this evening
           James will start his new job at the tech company next month.    start                   Other      