In [12]:
#Imports
import tkinter as tk
from tkinter import filedialog, messagebox, scrolledtext
import os
import threading 
import time
from sqlalchemy import create_engine, Column, Integer, String, Text, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship, declarative_base
from sqlalchemy.exc import SQLAlchemyError
import atexit
from langchain_community.document_loaders import JSONLoader, PyPDFLoader
from langchain_openai import ChatOpenAI
from langchain_text_splitters import RecursiveCharacterTextSplitter, RecursiveJsonSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_openai import OpenAIEmbeddings
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.retrievers import MergerRetriever
from dotenv import load_dotenv
load_dotenv()


#Chat History Database setup
DATABASE_URL = "sqlite:///Nurse_Chat_History_2.db"
Base = declarative_base()

class Session(Base):
    __tablename__ = "sessions"
    id = Column(Integer, primary_key=True)
    session_id = Column(String, unique=True, nullable=False)
    messages = relationship("Message", back_populates="session")

class Message(Base):
    __tablename__ = "messages"
    id = Column(Integer, primary_key=True)
    session_id = Column(Integer, ForeignKey("sessions.id"), nullable=False)
    role = Column(String, nullable=False)
    content = Column(Text, nullable=False)
    session = relationship("Session", back_populates="messages")

# Create the database and the tables
engine = create_engine(DATABASE_URL)
Base.metadata.create_all(engine)
SessionLocal = sessionmaker(bind=engine)

def get_db():
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()



# Function to save a single message
def save_message(session_id: str, role: str, content: str):
    with SessionLocal() as db:
        try:
            session = db.query(Session).filter(Session.session_id == session_id).first()
            if not session:
                session = Session(session_id=session_id)
                db.add(session)
                db.commit()
                db.refresh(session)

            db.add(Message(session_id=session.id, role=role, content=content))
            db.commit()
        except SQLAlchemyError:
            db.rollback()


# Function to load chat history
def load_session_history(session_id: str) -> ChatMessageHistory:
    chat_history = ChatMessageHistory()
    with SessionLocal() as db:
        try:
            session = db.query(Session).filter(Session.session_id == session_id).first()
            if session:
                for message in session.messages:
                    chat_history.add_message({"role": message.role, "content": message.content})
        except SQLAlchemyError:
            pass

    return chat_history


# Modify the get_session_history function to use the database
def get_session_history(session_id: str) -> ChatMessageHistory:
    if session_id not in store:
        # Load from the database if not in store
        store[session_id] = load_session_history(session_id)
    return store[session_id]


# Ensure you save the chat history to the database when needed
def save_all_sessions():
    for session_id, chat_history in store.items():
        for message in chat_history.messages:
            save_message(session_id, message["role"], message["content"])

# Register function to save sessions before exit
atexit.register(save_all_sessions)

store = {}


def NurseShiftSummary(filepath: str, session_id: str):
    try:
        llm = ChatOpenAI(model='gpt-4o')
        
        #initilze JSONLoader to load in JSON data. jq_schemea set to '.' to load in entire JSON, text_content = False as to not parse through text
        loader = JSONLoader(file_path=filepath, jq_schema='.', text_content=False)

        #Load JSON file into memory --> docs is a list of documents  
        docs = loader.load()

        #Instantiate the recursive text splitter. Recursively break loaded document into chunks of up to 5000 characters with 200 characters of overlap. add_start_index = True to keep track of the position of each chunk
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=200, add_start_index=True)  # Increased chunk-size to 5000 from 1000

        #Apply text_splitter to loaded document and break JSON into smaller documents. splits is a list of smaller chunks of text
        splits = text_splitter.split_documents(docs)

        #generate vector embeddings for each document
        vectorstore = InMemoryVectorStore.from_documents(documents=splits, embedding=OpenAIEmbeddings())

        #instantiate retriever configured to use similarity search to find documents in vector store. Used to provide context to LLM
        retriever = vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 4})


        pdf_loader = PyPDFLoader(file_path='NormalVitals.pdf')
        pdf_docs = pdf_loader.load()
        pdf_splits = text_splitter.split_documents(pdf_docs)
        pdf_vectorstore = InMemoryVectorStore.from_documents(documents=pdf_splits, embedding=OpenAIEmbeddings())
        pdf_retriever = pdf_vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 4})


        #Above lines document processing pipeline: Load JSON, split content into smaller chunks, embed chunks, similarity-based 
        
        combined_retriever = MergerRetriever(retrievers = [retriever, pdf_retriever], retriever_weights = [0.5,0.5])

        contextualize_q_system_prompt = (
            "Given a chat history and the latest user question "
            "which might reference context in the chat history, "
            "formulate a standalone question which can be understood "
            "without the chat history. Do NOT answer the question, "
            "just reformulate it if needed and otherwise return it as is."
        )

        contextualize_q_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", contextualize_q_system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )

        history_aware_retriever = create_history_aware_retriever(llm, combined_retriever, contextualize_q_prompt)

        system_prompt = (
            "You are a nurse in the NICU with preterm baby patients at the end of your shift, preparing for EHR documentation. "
            "Provide the documentation in the form of a JSON, shown below. "
            "Use the 'Normal Vital Signs in preterms' context to determine if the vital signs are within normal ranges. "
            "Use technical terms such as bradycardia, tachycardia, tachypnea, and bradypnea. "
            "The documentation should be technical and use proper medical terminology (Bradycardia/bradypnea, tachycardia/tachypnea). "
            "IF the vital sign is 141-170 and the vitalsign is measured to be 149, that therefore means within normal range."
            "Never mention that you are referencing some external information. "
            "Avoid diagnosing. "

            "The JSON fields should include: "
            "Name: The patient's first name "
            "Age: The patient's age in weeks and days, make note of prematurity if applicable, "
            "Sex: The patient's sex, "
            "Date_of_admission: The first recorded timestamp from the dataset, "
            "Primary_care_provider: The name of the patient's primary care provider, N/A if unknown, "
            "Discharge_diagnosis: The final diagnosis upon discharge, "
            "Complaint: The primary reason for the patient's admission, "
            "Time_course: The onset and duration of symptoms, "
            "Symptom_severity: Maximum and current severity levels, "
            "Associated_symptoms: Other symptoms related to the chief complaint, related to vital sign data "
            "Exacerbating_factors: Factors that make the condition worse, "
            "Relieving_factors: Factors that alleviate the condition, "
            "Interventions: A point form list of interventions performed during the patient's stay, with expplanations,"
            "HPI: A summary of the history of present illness, "
            "Past_medical_history: A summary of the patient's past medical conditions, "
            "Past_surgical_history: A list of previous surgeries, "
            "Allergies: A list of known allergies, "
            "Medications: A list of current medications, NONE if no medications or unknown, "
            "Review_of_systems: A structured summary of different body systems: cardiovascular, respiratory, "
            "Physical_exam: Findings from the physical examination, N/A if none or unknown"
            "Vital_signs: A summary of recorded vitals over time, including minimum and maximum values, in sentence format, "
            "Medical_decision_making: The rationale behind medical decisions made for the patient, "
            "Progress_notes: Detailed notes on the patient's condition throughout the shift, "
            "Urgent_care_course: A summary of care provided in urgent or emergency settings, "
            "Follow_up_plan: Instructions for continued care and criteria for returning to urgent care, "
            "Patient_notes: Additional notes about the patient. "

            "\n\n{context}"

        )

        qa_prompt = ChatPromptTemplate.from_messages(
            [
                ("system", system_prompt),
                MessagesPlaceholder("chat_history"),
                ("human", "{input}"),
            ]
        )
        question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
        rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

        conversational_rag_chain = RunnableWithMessageHistory(
            rag_chain,
            get_session_history,
            input_messages_key="input",
            history_messages_key="chat_history",
            output_messages_key="answer",
        )
        response = conversational_rag_chain.invoke(
            {"input": "Fill the JSON EHR on the status of the patient."},
            config={
                "configurable": {"session_id": session_id}
            },  # constructs a key "abc123" in `store`.
        )

        # Save the user question and AI response to the database
        save_message(session_id, "human", "Fill the JSON EHR on the status of the patient.")
        save_message(session_id, "ai", response['answer'])

        return response['answer']
    except Exception as e:
        return f"Error: {str(e)}"

In [18]:
answer = NurseShiftSummary('Kate_data.json','03-mini-high_newPromptTesting_1')

In [19]:
answer

'```json\n{\n  "Name": "Kate",\n  "Age": "32 weeks and 1 day (Post-Conceptual)",\n  "Sex": "F",\n  "Date_of_admission": "2024-11-28 23:56:33",\n  "Primary_care_provider": "N/A",\n  "Discharge_diagnosis": "N/A",\n  "Complaint": "Prematurity",\n  "Time_course": "Symptoms have been present since birth and are ongoing.",\n  "Symptom_severity": {\n    "Maximum": "High",\n    "Current": "Low to Moderate"\n  },\n  "Associated_symptoms": "Tachypnea and potential episodes of bradycardia.",\n  "Exacerbating_factors": "Handling and position changes can increase discomfort and respiratory rate.",\n  "Relieving_factors": "Swaddling and position adjustments help alleviate symptoms.",\n  "Interventions": [\n    "Diaper change: Scheduled diaper changes to maintain hygiene and comfort.",\n    "Position change: Alternated between supine and lateral positions to support respiratory function and comfort.",\n    "Pain management: Swaddling and providing non-nutritive sucking to soothe and manage pain.",\n 

In [46]:
import json
from fpdf import FPDF
import re

class EHRPDF(FPDF):
    def header(self):
        """Custom Header for the EHR PDF"""
        self.set_font("Times", "B", 16)
        self.cell(0, 10, "Medical Patient Chart - Electronic Health Record (EHR)", ln=True, align="C")
        self.ln(5)
        self.set_draw_color(50, 50, 50)
        self.line(10, 25, 200, 25)
        self.ln(10)

    def footer(self):
        """Footer with page number"""
        self.set_y(-15)
        self.set_font("Times", "I", 10)
        self.cell(0, 10, f"Page {self.page_no()}", align="C")

    def add_section_header(self, title):
        """Formats section headers"""
        self.set_font("Times", "B", 14)
        self.cell(0, 8, title, ln=True)
        self.ln(2)
        self.set_draw_color(150, 150, 150)
        self.line(10, self.get_y(), 200, self.get_y())
        self.ln(5)

    def add_field(self, field_name, field_value):
        """Formats fields for readability"""
        self.set_font("Times", "B", 12)
        self.cell(0, 7, f"{field_name}:", ln=True)
        self.set_font("Times", "", 12)

        field_value = self.convert_to_latin1(field_value)

        if isinstance(field_value, str):
            self.multi_cell(0, 7, field_value)
        elif isinstance(field_value, list):
            for item in field_value:
                self.cell(5)
                self.multi_cell(0, 7, f"- {item}")
        elif isinstance(field_value, dict):
            for key, value in field_value.items():
                self.multi_cell(0, 7, f"{key.capitalize()}: {value}")
        self.ln(3)

    def add_table(self, title, data_dict):
        """Creates a table for structured data like Vital Signs"""
        self.add_section_header(title)
        self.set_font("Times", "B", 12)
        col_widths = [60, 65, 65]
        headers = ["Vital Sign", "Minimum", "Maximum"]

        for i in range(len(headers)):
            self.cell(col_widths[i], 8, headers[i], border=1, align="C")
        self.ln()

        self.set_font("Times", "", 12)
        for key, values in data_dict.items():
            self.cell(col_widths[0], 8, key.replace("_", " ").capitalize(), border=1, align="C")
            self.cell(col_widths[1], 8, str(values.get("minimum", "N/A")), border=1, align="C")
            self.cell(col_widths[2], 8, str(values.get("maximum", "N/A")), border=1, align="C")
            self.ln()
        self.ln(5)

    def convert_to_latin1(self, text):
        """Ensures text is Latin-1 (ISO-8859-1) compatible by removing unsupported characters."""
        if isinstance(text, str):
            return text.encode("latin-1", "ignore").decode("latin-1")  
        elif isinstance(text, list):
            return [self.convert_to_latin1(item) for item in text]
        elif isinstance(text, dict):
            return {key: self.convert_to_latin1(value) for key, value in text.items()}
        return text

def extract_vitals_from_string(vitals_string):
    """Extracts ALL vital signs from a string and returns a structured dictionary."""
    matches = re.findall(r"([\w\s]+) ranged from (\d+\.?\d*) to (\d+\.?\d*)", vitals_string)

    structured_vitals = {}
    for match in matches:
        vital_name = match[0].strip().replace(" ", "_").lower()
        structured_vitals[vital_name] = {
            "minimum": match[1] + " units",
            "maximum": match[2] + " units"
        }
    return structured_vitals

def generate_patient_ehr(json_string, output_path):
    """Generates a professional PDF EHR document from a JSON string."""

    # Clean up JSON string
    if json_string.startswith("```json"):
        json_string = json_string.replace("```json", "").replace("```", "").strip()

    # Parse JSON data
    try:
        patient_data = json.loads(json_string)
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        return

    # Create and format the PDF
    pdf = EHRPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()

    # Patient Info Section
    pdf.add_section_header("Patient Information")
    pdf.add_field("Name", patient_data.get("Name", "N/A"))
    pdf.add_field("Age", patient_data.get("Age", "N/A"))
    pdf.add_field("Sex", patient_data.get("Sex", "N/A"))
    pdf.add_field("Date of Admission", patient_data.get("Date_of_admission", "N/A"))
    pdf.add_field("Primary Care Provider", patient_data.get("Primary_care_provider", "N/A"))

    # Medical Details
    pdf.add_section_header("Medical History")
    pdf.add_field("Chief Complaint", patient_data.get("Complaint", "N/A"))
    pdf.add_field("Time Course", patient_data.get("Time_course", "N/A"))
    pdf.add_field("Symptom Severity", patient_data.get("Symptom_severity", "N/A"))
    pdf.add_field("Associated Symptoms", patient_data.get("Associated_symptoms", []))
    pdf.add_field("Exacerbating Factors", patient_data.get("Exacerbating_factors", []))
    pdf.add_field("Relieving Factors", patient_data.get("Relieving_factors", []))

    # Vital Signs Table
    vitals_data = patient_data.get("Vital_signs", {})
    if isinstance(vitals_data, str):
        vitals_data = extract_vitals_from_string(vitals_data)  

    if isinstance(vitals_data, dict) and vitals_data:
        pdf.add_table("Vital Signs", vitals_data)

    # Interventions (Fix Formatting)
    pdf.add_section_header("Interventions")
    interventions = patient_data.get("Interventions", [])

    formatted_interventions = []
    if all(isinstance(i, str) for i in interventions):  # If list of strings
        formatted_interventions = interventions
    elif all(isinstance(i, dict) for i in interventions):  # If list of dictionaries
        formatted_interventions = [f"{i['type'].capitalize()}: {i['detail']}" for i in interventions]

    pdf.add_field("Actions Taken", formatted_interventions)

    # Progress Notes
    pdf.add_section_header("Progress Notes")
    pdf.add_field("Observations", patient_data.get("Progress_notes", "N/A"))

    # Follow-up Plan
    pdf.add_section_header("Follow-up Plan")
    pdf.add_field("Next Steps", patient_data.get("Follow_up_plan", "N/A"))

    # Save PDF
    pdf.output(output_path)
    print(f"Professional EHR PDF saved to {output_path}")

# Example usage
output_file_path = "Fixed_Medical_Chart.pdf"
generate_patient_ehr(answer, output_file_path)


Professional EHR PDF saved to Fixed_Medical_Chart.pdf
