In [19]:
import re
import nltk
import spacy
import pandas as pd
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from IPython.display import display
nltk.download('punkt')
nltk.download('stopwords')
nlp = spacy.load("en_core_web_sm")

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/parthabhang/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/parthabhang/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
# Function to Read I/P file
def read_text_file(file_path):
    try:
        with open(file_path, "r", encoding="utf-8") as file:
            text = file.read()
        return text
    except FileNotFoundError:
        print("Error: The file was not found.")
        return ""

In [None]:
# Function to Clean Text
def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)
    sentences = sent_tokenize(text)
    return sentences

In [None]:
# Function to POS for Verb Only
def pos_tagging(sentence):
    doc = nlp(sentence)
    verbs = [token.text for token in doc if token.pos_ == "VERB"]
    return verbs


In [None]:
# Function to Lemmatize Verb
def lemmatize_verb(verb):
    doc = nlp(verb)
    return doc[0].lemma_

In [None]:
# Function to Get only main verb
def extract_main_verb(sentence):
    doc = nlp(sentence)
    for token in doc:
        if token.pos_ == "VERB" or token.dep_ == "ROOT":  # Get main verb
            return lemmatize_verb(token.text)
    return "No verb found"

In [None]:
# Function to Identify Task
def identify_tasks(sentences):
    task_verbs = []
    for sentence in sentences:
        verb = extract_main_verb(sentence)
        if verb != "No verb found":
            task_verbs.append(verb)
    return task_verbs

In [None]:
# Function to Extract name
def extract_who_task(task_sentence):
    doc = nlp(task_sentence)
    for token in doc:
        if token.pos_ in ["PROPN", "PRON"]:  # Proper Noun or Pronoun
            return token.text
    return "Unknown"

In [None]:
# Function to Extract Deadline
def extract_deadline(task_sentence):
    doc = nlp(task_sentence)
    for ent in doc.ents:
        if ent.label_ in ["TIME", "DATE"]:
            return ent.text
    return "No deadline specified"

In [None]:
# Function for Categorization of task
def categorize_task(task_sentence):
    categories = {
        "Personal": ["wake", "sleep", "play", "workout", "exercise", "relax", "rest", "meditate", "travel", "go", "text", "call"],
        "Household": ["clean", "wash", "cook", "sweep", "mop", "laundry", "repair", "arrange", "fix", "organize"],
        "Shopping": ["buy", "purchase", "order", "shop", "pay", "pick", "collect", "return", "exchange"],
        "Work": ["review", "submit", "study", "meeting", "code", "complete", "write", "read", "analyze", "plan", "discuss", "present", "schedule", "design", "report"],
        "Appointments & Meetings": ["schedule", "attend", "reschedule", "organize", "cancel", "confirm", "set up", "book", "meet", "invite"],
        "Communication": ["call", "email", "text", "message", "reply", "respond", "chat", "discuss", "notify", "update", "report"],
        "Travel & Booking": ["book", "reserve", "cancel", "travel", "pack", "plan", "schedule", "check-in", "board", "arrange", "visit", "tour"],
        "Health & Fitness": ["exercise", "run", "walk", "jog", "gym", "yoga", "meditate", "train", "stretch", "workout", "hydrate"],
        "Finance & Payments": ["pay", "transfer", "deposit", "withdraw", "invest", "save", "budget", "buy", "sell", "donate", "fund"]
    }
    for category, keywords in categories.items():
        if any(keyword in task_sentence.lower() for keyword in keywords):
            return category
    return "Other"

In [None]:
# Load the file
file_path = "input.txt"
text = read_text_file(file_path)
# perform only if file found.
if text:
    sentences = preprocess_text(text)
    task_data = []
    for sentence in sentences:
        task = extract_main_verb(sentence)
        category = categorize_task(task)
        who = extract_who_task(sentence)
        deadline = extract_deadline(sentence)
        task_data.append({"Sentence": sentence, "Task": task, "Category": category, "Who": who, "Deadline": deadline})
    df = pd.DataFrame(task_data)
    # display the result in table
    display(df)

    # Save output in csv file.
    df.to_csv("task_output.csv", index=False)
    print("Output saved as task_output.csv")

Unnamed: 0,Sentence,Task,Category,Who,Deadline
0,Alan wakes up early every day and goes for a m...,wake,Personal,Alan,early every day
1,He has to submit his project report by 5 PM to...,have,Other,He,5 PM
2,Neha should call the doctor to book an appoint...,call,Personal,Unknown,No deadline specified
3,Vikram needs to clean the garage this weekend.,need,Other,Vikram,this weekend
4,Amit is required to buy groceries for the hous...,require,Other,Unknown,6 PM
5,Priya must finish reading the research paper b...,finish,Other,Priya,tomorrow
6,"John has a dentist appointment at 4 PM, and he...",have,Other,John,4 PM
7,Anjali will travel to Mumbai for a conference ...,travel,Personal,Mumbai,next Monday
8,Suresh has to pay the electricity bill before ...,have,Other,Suresh,Friday
9,Meera needs to schedule a meeting with the mar...,need,Other,Meera,No deadline specified


Output saved as task_output.csv
