In [None]:
!pip install PyMuPDF
!pip install python-docx
!pip install pytesseract Pillow PyMuPDF python-docx
!pip install openai


Collecting PyMuPDF
  Downloading PyMuPDF-1.23.21-cp310-none-manylinux2014_x86_64.whl (4.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting PyMuPDFb==1.23.9 (from PyMuPDF)
  Downloading PyMuPDFb-1.23.9-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (30.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.6/30.6 MB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMuPDFb, PyMuPDF
Successfully installed PyMuPDF-1.23.21 PyMuPDFb-1.23.9
Collecting python-docx
  Downloading python_docx-1.1.0-py3-none-any.whl (239 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.6/239.6 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: python-docx
Successfully installed python-docx-1.1.0
Collecting pytesseract
  Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract


In [None]:
import os
import json
import fitz  # PyMuPDF
import xml.etree.ElementTree as ET
from docx import Document
import pytesseract
from PIL import Image
import re
import openai
from openai import OpenAI
# Clean text function
def clean_text(text):
    text = re.sub(r'[^\w\s,.!?;:\'-]', '', text)
    text = re.sub(r'(\.{3,})', '.', text)  # Reduce sequences of dots
    text = re.sub(r'([!]{2,})', '!', text)
    text = re.sub(r'([?]{2,})', '?', text)
    text = re.sub(r'\s+', ' ', text)
    return text.strip()

# Extract text from PDF
def extract_text_from_pdf(file_path):
    text = ''
    with fitz.open(file_path) as doc:
        for page in doc:
            text += page.get_text()
    return clean_text(text)

# Extract text from XML
def extract_text_from_xml(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()
    text = ''.join(root.itertext())
    return clean_text(text)

# Extract text from JSON
def extract_text_from_json(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
        text = json.dumps(data)  # Convert JSON to string for cleaning
    return clean_text(text)

# Extract text from DOCX
def extract_text_from_docx(file_path):
    doc = Document(file_path)
    text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
    return clean_text(text)

# Extract text from TXT
def extract_text_from_txt(file_path):
    with open(file_path, 'r') as file:
        text = file.read()
    return clean_text(text)

# Extract text from images
def extract_text_from_image(file_path):
    try:
        image = Image.open(file_path)
        text = pytesseract.image_to_string(image)
        return clean_text(text)
    except Exception as e:
        print(f"Error processing image {file_path}: {e}")
        return ""

# Dispatcher to handle different file types
def extract_text(file_path):
    if not os.path.exists(file_path):
        print(f"File {file_path} does not exist.")
        return

    ext = os.path.splitext(file_path)[1].lower()
    try:
        if ext == '.pdf':
            return extract_text_from_pdf(file_path)
        elif ext == '.xml':
            return extract_text_from_xml(file_path)
        elif ext == '.json':
            return extract_text_from_json(file_path)
        elif ext == '.docx':
            return extract_text_from_docx(file_path)
        elif ext == '.txt':
            return extract_text_from_txt(file_path)
        elif ext in ['.jpg', '.jpeg', '.png']:
            return extract_text_from_image(file_path)
        else:
            print(f"Unsupported file format: {ext}")
            return ""
    except Exception as e:
        print(f"An error occurred while processing the file: {e}")
        return ""
def check_sdnn_and_diseases(sdnn_severity, output):
    # Initial advice based on SDNN severity
    if sdnn_severity == "medium":
        print("Note: The patient has a higher risk of Metabolic Syndrome (MetS).")
        print("Note: The patient has a higher risk of Hypertension.")
        print("Note: The patient has a higher risk of Cardiovascular disease.")
        risk_level = "higher"
    elif sdnn_severity == "high":
        print("Note: The patient has a high risk of Metabolic Syndrome (MetS).")
        print("Note: The patient has a high risk of Hypertension.")
        print("Note: The patient has a higher risk of Cardiovascular disease.")
        risk_level = "very likely"
    else:
        risk_level = "none"  # Default, if SDNN severity is none or unspecified

    # Diseases to check, including specific messages for Diabetes
    diseases = {
        "Diabetes": "The patient has a higher chance of experiencing Hypoglycemia and Diabetic Autonomic Neuropathy.",
        "Fibromyalgia": "",
        "Asthma": "",
        "Depression": "Depression severity might be correlated with abnormal HRV metrics.",
        "Chronic Heart Failure": "CHF patients with abnormal HRV have a higher risk of adverse outcomes."
    }

    # Check for each disease in the output string
    for disease, message in diseases.items():
        if disease.lower() in output.lower():  # Case-insensitive search
            if disease == "Diabetes":
                print(f"Note: {message}")
                if risk_level == "higher":
                    print(f"Additionally, due to the medium SDNN severity, the patient should be closely monitored for these conditions.")
                elif risk_level == "very likely":
                    print(f"Additionally, due to the high SDNN severity, immediate examination for these conditions is recommended.")
            else:
                if risk_level == "higher":
                    print(f"Note: The patient with {disease} has a higher chance of worse symptoms and should be asked.")
                elif risk_level == "very likely":
                    print(f"Note: The patient with {disease} has a very likely chance of worse symptoms and should be examined.")
                if message:
                    print(f"Additional note: {message}")

if __name__ == "__main__":
    # Example usage
    file_paths = ['/content/5834276350123468476.pdf']
    output = ""
    for path in file_paths:
        text = extract_text(path)
        if text:
            #print(f"Extracted Text from {path}:\n{text}\n")
            output += text
    sdnn_severity = "high"
    output = text
    check_sdnn_and_diseases(sdnn_severity, output)

    running gpt to summarize
    apikey = "enter_api_key"
    client = OpenAI(api_key = apikey)
    summary_format = "**[Patient Information]**\n- Name: [Patient's Name]\n- Age: [Patient's Age]\n- Gender: [Patient's Gender]\n- Medical Record Number: [MRN]\n- Date of Admission: [Date of Admission]\n- Date of Discharge: [Date of Discharge]\n\n**[Chief Complaint]**\n- [Brief description of the patient's main concern or reason for admission]\n\n**[History of Present Illness]**\n- [Detailed narrative of the current health issue, including onset, progression, exacerbating/alleviating factors, and any treatments received]\n\n**[Past Medical History]**\n- [Summary of relevant past medical conditions, surgeries, hospitalizations, and significant illnesses]\n\n**[Medications]**\n- [List of current medications, including dosage, frequency, and route of administration]\n\n**[Allergies]**\n- [List of any known allergies, including medication, food, or environmental allergies]\n\n**[Social History]**\n- [Brief overview of the patient's lifestyle, including habits such as smoking, alcohol consumption, and recreational drug use]\n\n**[Family History]**\n- [Summary of relevant family medical history, including any hereditary conditions or diseases]\n\n**[Review of Systems]**\n- [Brief review of various bodily systems, noting any relevant positive or negative findings]\n\n**[Physical Examination]**\n- [Summary of findings from the physical examination, including vital signs and pertinent clinical observations]\n\n**[Laboratory and Diagnostic Results]**\n- [Summary of relevant laboratory tests, imaging studies, and other diagnostic procedures, including results and interpretation]\n\n**[Assessment and Plan]**\n- [Summary of the diagnosis(es) and the plan for further evaluation and management, including medications, procedures, consultations, and follow-up arrangements]\n\n**[Prognosis]**\n- [Brief discussion of the expected course of the illness and potential outcomes]\n\n**[Discharge Instructions]**\n- [Instructions provided to the patient upon discharge, including medication regimen, activity restrictions, follow-up appointments, and any other relevant information]\n\n**[Provider Signature]**\n- [Name and credentials of the provider responsible for the medical report]"

    response = client.chat.completions.create(
        messages=[
            {
                "role" : "system",
                "content" : "You are an assistant who provides eletronic report summaries in a consistent format: ",
            },
            {
                "role": "user",
                "content": "Summarize this electronic health report: " + output,
            }
        ],
        model="gpt-3.5-turbo",
    )
    print(response.choices[0].message.content)



Note: The patient has a high risk of Metabolic Syndrome (MetS).
Note: The patient has a high risk of Hypertension.
Note: The patient has a higher risk of Cardiovascular disease.
Note: The patient has a higher chance of experiencing Hypoglycemia and Diabetic Autonomic Neuropathy.
Additionally, due to the high SDNN severity, immediate examination for these conditions is recommended.
Note: The patient with Asthma has a very likely chance of worse symptoms and should be examined.
