In [7]:
pip install pdfplumber

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [8]:
import csv
import re
from textblob import TextBlob
import pdfplumber  # For extracting text from PDF

# Load the corpus from CSV
def load_corpus(csv_file):
    corpus = {}
    with open(csv_file, mode='r') as file:
        reader = csv.reader(file)
        next(reader)  # Skip the header
        for row in reader:
            word, priority = row
            corpus[word] = priority
    return corpus

# Priority classification function
def classify_priority(description, corpus):
    # Convert description to lower case
    description = description.lower()
    
    # Split description into words
    words = re.findall(r'\b\w+\b', description)
    
    # Initialize priority count
    priority_count = {"High": 0, "Medium": 0, "Low": 0}
    
    # Check each word in the description
    for word in words:
        if word in corpus:
            category = corpus[word]
            priority_count[category] += 1
    
    # Determine the highest priority category
    max_priority = max(priority_count, key=priority_count.get)
    
    # Handle the case where all counts are zero (no relevant words found)
    if priority_count[max_priority] == 0:
        return "Low"
    
    # Perform sentiment analysis
    blob = TextBlob(description)
    sentiment = blob.sentiment.polarity
    
    # Adjust priority based on sentiment (example logic)
    if sentiment < -0.5:
        return "High"
    elif sentiment < 0:
        return max_priority
    else:
        return max_priority if priority_count[max_priority] > 0 else "Low"

# Function to extract text (description) from PDF
def extract_description_from_pdf(pdf_file):
    with pdfplumber.open(pdf_file) as pdf:
        text = ""
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
        return text.strip()  # Return the extracted text as the description

# Load the CSV file into the corpus
csv_file = 'corpusForBEproject.csv'
corpus = load_corpus(csv_file)

# Extract description from the PDF file
pdf_file = 'sample.pdf'
description = extract_description_from_pdf(pdf_file)
print(f"Extracted description from PDF:\n{description}")

# Classify the priority of the extracted description
priority = classify_priority(description, corpus)
print(f"The priority of the description is: {priority}")


Extracted description from PDF:
Title: "Task Urgency Report"
Description:
"This task involves fixing a critical bug that is causing major disruptions. It requires urgent attention
and should be prioritized over other medium-priority tasks. Failure to resolve this issue promptly will
have a high impact on the project."
The priority of the description is: High
