# Task-A

In [1]:
import spacy
import dateparser
from datetime import datetime, timedelta
import os

# Load the spaCy language model for English (small model)
nlp = spacy.load("en_core_web_sm")

# Define a set of task-related keywords
TASK_KEYWORDS = {"must", "should", "has to", "have to", "needs to", "is required to"}

# Date Parsing Module

def parse_date(date_text):
    """
    Parses date text and returns a formatted date string.
    
    This function handles:
      - Relative terms like "today" and "tomorrow".
      - Weekday names (e.g., "next Friday") by computing the next occurrence.
      - Other relative date expressions using dateparser.
    
    Parameters:
        date_text (str): The date string extracted from text.
    
    Returns:
        str or None: A formatted date string (or day name) if parsing is successful;
                     otherwise, None.
    """
    # Check for simple relative dates
    if "today" in date_text.lower():
        return "today"
    elif "tomorrow" in date_text.lower():
        return "tomorrow"
    
    # Mapping for week days
    week_days = {
        "monday": 0, "tuesday": 1, "wednesday": 2, "thursday": 3,
        "friday": 4, "saturday": 5, "sunday": 6
    }
    
    # Look for any weekday mention in the date text
    for day in week_days:
        if day in date_text.lower():
            today = datetime.today()
            target_day = week_days[day]
            days_ahead = target_day - today.weekday()
            if days_ahead <= 0:  # If the day is today or in the past, select next week's occurrence
                days_ahead += 7
            next_weekday = today + timedelta(days=days_ahead)
            return next_weekday.strftime('%A')  # Return the day name (e.g., 'Monday')
    
    # For other relative dates, use dateparser with settings favoring future dates
    parsed_date = dateparser.parse(date_text, settings={'PREFER_DATES_FROM': 'future', 'RELATIVE_BASE': datetime.today()})
    if parsed_date:
        return parsed_date.strftime('%Y-%m-%d %H:%M:%S')
    return None

# Task Extraction Module

def extract_task_details(sentences):
    """
    Extracts task details (responsible entity, deadline, and category) from a list of sentences.
    
    The function processes each sentence to determine if it represents a task based on the presence
    of predefined keywords. It then uses spaCy's named entity recognition to extract:
      - 'who' is responsible for the task (e.g., a person or organization).
      - The deadline by searching for date/time entities.
      - The task category via keyword matching.
    
    Parameters:
        sentences (list of str): List of sentences from the text.
    
    Returns:
        list of dict: Each dictionary contains details for a task:
                      {
                          "task": original task sentence,
                          "who": responsible person or organization,
                          "deadline": extracted deadline or "no deadline",
                          "category": assigned category of the task
                      }
    """
    tasks = []
    
    for sentence in sentences:
        # Skip empty sentences after stripping whitespace
        if not sentence.strip():
            continue
        
        # Process sentence with spaCy NLP model
        doc = nlp(sentence)
        task_info = {
            "task": sentence.strip(),
            "who": None,
            "deadline": "no deadline",
            "category": None
        }
        
        # Determine if the sentence likely represents a task using task keywords
        if any(keyword in sentence.lower() for keyword in TASK_KEYWORDS):
            # Attempt to extract the responsible entity (e.g., a PERSON or ORG)
            for ent in doc.ents:
                if ent.label_ in {"PERSON", "ORG"}:
                    task_info["who"] = ent.text
                    break
            
            # If no entity is detected, try to extract the subject manually using dependency parsing
            if not task_info["who"]:
                for token in doc:
                    if token.dep_ == "nsubj":
                        task_info["who"] = token.text
                        break
            
            # Extract deadline information from DATE or TIME entities
            for ent in doc.ents:
                if ent.label_ in {"DATE", "TIME"}:
                    parsed_date = parse_date(ent.text)
                    if parsed_date:
                        task_info["deadline"] = parsed_date
                    break  # Use the first encountered date/time entity
            
            # Categorize the task based on action words present in the sentence
            task_info["category"] = categorize_task(task_info["task"])
            
            # Append the task details to the results list
            tasks.append(task_info)
    
    return tasks


# Task Categorization Module

def categorize_task(task):
    """
    Assigns a category to the task based on action keywords found in the task description.
    
    Supported categories include:
      - Shopping, Cleaning, Work, Appointments.
    
    If no specific keywords are found, the task is assigned to the "General" category.
    
    Parameters:
        task (str): The task description.
    
    Returns:
        str: The category name.
    """
    categories = {
        "Shopping": ["buy", "purchase", "snack"],
        "Cleaning": ["clean", "sweep", "wash", "cleaning"],
        "Work": ["submit", "review", "write", "send", "report"],
        "Appointments": ["meet", "call", "schedule", "appointment"]
    }
    
    # Loop over each category and check if any keyword is present in the task
    for category, keywords in categories.items():
        if any(word in task.lower() for word in keywords):
            return category
    return "General"


# File Reading and Processing Module

def read_file_and_process(file_path):
    """
    Reads text data from a file, splits it into sentences, and extracts task details.
    
    Parameters:
        file_path (str): Path to the input text file.
    
    Returns:
        list of dict: A list of task details dictionaries extracted from the text.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    
    # Rudimentary sentence splitting (could be replaced with a more sophisticated method)
    sentences = text.split('.')
    
    # Extract task details from the list of sentences
    tasks = extract_task_details(sentences)
    return tasks

# Main Pipeline Runner

def main():
    """
    Main function to run the task extraction pipeline.
    
    The function performs the following steps:
      1. Checks for the existence of an input file at the specified path.
      2. If found, processes the file to extract task details.
      3. If not found, uses a manually curated sample text.
      4. Outputs the extracted tasks.
      5. Displays insights and challenges encountered during development.
    """
    # Define the file path (update this path as needed)
    file_path = "C:/Users/asus/Desktop/Text_Extractor/Extract.txt"
    
    # Check if the file exists; if not, use a manually curated sample
    if os.path.exists(file_path):
        tasks = read_file_and_process(file_path)
        print(f"Extracted {len(tasks)} tasks from file '{file_path}':\n")
    else:
        print(f"File not found: {file_path}. Using a manual sample instead.\n")
        sample_text = (
            "John must submit the report by next Friday. "
            "Mary should schedule an appointment for tomorrow. "
            "Alex has to buy groceries today. "
            "The team needs to review the project proposal next Monday."
        )
        # Split the sample text into sentences
        sentences = sample_text.split('.')
        tasks = extract_task_details(sentences)
    
    # Output the extracted tasks
    for task in tasks:
        print(task)
   
    

# Execute the main pipeline if this script is run directly
if __name__ == '__main__':
    main()


Extracted 3 tasks from file 'C:/Users/asus/Desktop/Text_Extractor/Extract.txt':

{'task': 'john must complete the annual budget review by next Friday', 'who': 'john', 'deadline': 'Friday', 'category': 'Work'}
{'task': 'Mary has to schedule the project kickoff meeting for tomorrow afternoon', 'who': 'Mary', 'deadline': 'tomorrow', 'category': 'Appointments'}
{'task': 'Alex is required to finalize the vendor contract negotiations before the client event next Weekend', 'who': 'Alex', 'deadline': 'no deadline', 'category': 'General'}
