- This note takes all motes in original_notes folder 
- It generates a new note with the same content 
- It uses the new AI note to generates 20 variations for each note: 
    - 5 variations that remove relevant information
    - 5 variations that add synthetic relevant information
    - 5 variations that remove non-relevant information
    - 5 variations that add synthetic non-relevant information
- All modifications are saved in the modification_details.csv file
- It saves all the notes in the modified_notes folder


In [1]:
!.venv\Scripts\activate

In [1]:
import os
import dotenv
dotenv.load_dotenv()
# print(os.getenv("OPENAI_API_KEY"))

True

In [2]:
import os
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage

# Initialize the language model
llm = ChatOpenAI(model="gpt-4o", temperature=0.7)

# Define the folder paths
original_folder = "original_notes"
modified_folder = "modified_notes"

# Ensure the modified notes folder exists
os.makedirs(modified_folder, exist_ok=True)


In [3]:
relevancy_prompt = (  
            "You are a medical note rewriter.   You need to understand the meeaning of relevant information. "  
            "The following are the key pieces of information that should be included in a patient discharge summary, "
            "the are defined as relevant information: "
            "1. Hospital Admission and Discharge Details, "
            "2. Reason for Hospitalization, "
            "3. Hospital Course Summary, "
            "4. Discharge Diagnosis, "
            "5. Procedures Performed, "
            "6. Medications at Discharge, "
            "7. Discharge Instructions, "
            "8. Follow-Up Care, "
            "9. Patient's Condition at Discharge, "
            "10. Patient Education and Counseling, "
            "11. Pending Results, "
            "12. Advance Directives and Legal Considerations, "
            "13. Healthcare Provider Information, "
            "14. Additional Notes."
        )


In [10]:


# Function to generate variations of the AI note
def generate_variations(note, prefix):
    variations = []
    for i in range(1, 6):
        # Prompts for generating variations
        omit_relevant_prompt = f"Remove {i} key pieces of relevant information from the note."
        omit_irrelevant_prompt = f"Remove {i} non-relevant details from the note."
        inj_relevant_prompt = f"Add {i} additional synthetic relevant details to the note."
        inj_irrelevant_prompt = f"Add {i} synthetic non-relevant details to the note."
        
        # Generate variations using the prompts
        variations.append((f"{prefix}omit{i}r", modify_note_with_prompt(note, omit_relevant_prompt)))
        variations.append((f"{prefix}omit{i}ir", modify_note_with_prompt(note, omit_irrelevant_prompt)))
        variations.append((f"{prefix}inj{i}r", modify_note_with_prompt(note, inj_relevant_prompt)))
        variations.append((f"{prefix}inj{i}ir", modify_note_with_prompt(note, inj_irrelevant_prompt)))
    
    return variations

# Helper function to modify the note using a prompt
def modify_note_with_prompt(note, prompt):
    response = llm.invoke([
        SystemMessage(content=relevancy_prompt ),
        HumanMessage(content= prompt +"\n\n the note is: " + note)
    ])
    return response.content.strip()


100%|██████████| 1/1 [01:20<00:00, 80.48s/it]

Notes processing complete.





In [12]:
from tqdm import tqdm
import csv

# Initialize a list to store modification details
modification_details = []

# Process each document in the original notes folder
for filename in tqdm(os.listdir(original_folder)):
    if filename.endswith(".txt"):
        filepath = os.path.join(original_folder, filename)
        
        # Load the original note
        with open(filepath, 'r', encoding='utf-8') as file:
            original_note = file.read()
        
        # Generate the AI rewritten note
        system_message = "Rewrite the note professionally, omitting normal lab values and normal vital signs. "
        human_message = f"Original Note:\n\n{original_note}"
        
        response = llm.invoke([
            SystemMessage(content=system_message),
            HumanMessage(content=human_message)
        ])
        
        note = response.content.strip()
        
        # Generate variations of the AI note
        variations = generate_variations(note, "AI_")
        
        # Create a folder for the original note and its variations
        note_folder = os.path.join(modified_folder, os.path.splitext(filename)[0])
        os.makedirs(note_folder, exist_ok=True)
        
        # Save the original and AI notes
        with open(os.path.join(note_folder, "original.txt"), 'w', encoding='utf-8') as file:
            file.write(original_note)
        
        with open(os.path.join(note_folder, "AI.txt"), 'w', encoding='utf-8') as file:
            file.write(note)
        
        # Save each variation
        for var_name, var_content in variations:
            with open(os.path.join(note_folder, f"{var_name}.txt"), 'w', encoding='utf-8') as file:
                file.write(var_content)
        
        # Collect modification details
        modification_details.append({
            "filename": filename,
            "modifications": "Generated AI note and 20 variations"
        })

# Save modification details to a CSV file
csv_filepath = os.path.join(modified_folder, "modification_details.csv")
with open(csv_filepath, 'w', newline='', encoding='utf-8') as csvfile:
    fieldnames = ['filename', 'modifications']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    for detail in modification_details:
        writer.writerow(detail)

print("Notes processing complete.")

100%|██████████| 1/1 [01:14<00:00, 74.16s/it]

Notes processing complete.



