In [4]:
import pdfplumber
import subprocess
import os
import pytesseract
from PIL import Image
import docx


# -----------------------------
# File Extractors
# -----------------------------
def extract_text_from_pdf(file_path):
    """Extract text from a PDF file using pdfplumber"""
    text = ""
    with pdfplumber.open(file_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text.strip()


def extract_text_from_word(file_path):
    """Extract text from a Word document using python-docx"""
    doc = docx.Document(file_path)
    text = "\n".join([para.text for para in doc.paragraphs])
    return text.strip()


def extract_text_from_image(file_path):
    """Extract text from an image using pytesseract"""
    image = Image.open(file_path)
    text = pytesseract.image_to_string(image)
    return text.strip()


# -----------------------------
# Disease & Risk Analysis
# -----------------------------
def identify_disease_and_risk(text):
    """Send extracted text to Ollama TinyLLaMA for analysis"""
    prompt = f"""
You are a mental health medical NLP assistant.

From the following clinical note, determine if it relates to mental health.

If it does, identify the disease mentioned, determine the risk level, and provide a medical suggestion.

If it does not, return the message: "The document is not related to mental health or is invalid."

Present the output in this exact structured format only if the note is related to mental health:

Disease Name: [Name of the disease]
Risk Level: [Low / Moderate / High]
Suggestion: [Provide a short actionable medical suggestion]

Clinical Note:
\"\"\"{text}\"\"\"
"""

    result = subprocess.run(
        ["ollama", "run", "tinyllama"],  # ✅ switched to TinyLLaMA
        input=prompt,
        text=True,
        capture_output=True
    )

    if result.stderr:
        print("⚠️ Error from Ollama:", result.stderr)
    return result.stdout.strip()


# -----------------------------
# Main Function
# -----------------------------
if __name__ == "__main__":
    file_path = r"C:\CTSHack\ClinicalNote\Data\synthetic_clinical_note.docx"

    if not os.path.exists(file_path):
        print("❌ File does not exist:", file_path)
        exit()

    ext = os.path.splitext(file_path)[1].lower()
    extracted_text = ""

    try:
        if ext == ".pdf":
            extracted_text = extract_text_from_pdf(file_path)
        elif ext == ".docx":
            extracted_text = extract_text_from_word(file_path)
        elif ext in [".jpg", ".jpeg", ".png"]:
            extracted_text = extract_text_from_image(file_path)
        else:
            raise ValueError("❌ Enter a valid document (PDF, DOCX, or Image)")

        if extracted_text:
            print("\n=== Extracted Clinical Note ===\n")
            print(extracted_text)

            result = identify_disease_and_risk(extracted_text)
            print("\n=== Disease Risk Analysis ===\n")
            print(result)
        else:
            print("❌ No text extracted from the document!")

    except Exception as e:
        print(f"⚠️ Error: {e}")



=== Extracted Clinical Note ===

Patient Name: John D.
Age: 42
Gender: Male Date: 2025-08-31
Physician: Dr. Smith

Chief Complaint:
Patient reports persistent feelings of sadness, lack of motivation, and difficulty sleeping for the past 3 months.
He describes increased irritability at work and frequent arguments with family.

History of Present Illness (HPI):
Reports low energy and loss of interest in previously enjoyable activities.
Appetite reduced, with ~5kg unintentional weight loss.
Occasional headaches, denies chest pain or shortness of breath.
Endorses episodes of anxiety with palpitations and sweating.
Patient admits to thoughts of hopelessness but denies current suicidal plan or intent.

Past Medical History:
Hypertension (controlled with medication)
No history of psychiatric hospitalization

Medications:
Lisinopril 10mg daily
Occasional use of over-the-counter sleep aid

Social History:
Works as an IT consultant, married with 2 children
No tobacco, drinks alcohol socially (2