In [None]:
### o3 ehr review 

In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.max_colwidth', None)  # Set max column width to None
pd.set_option('display.width', None)  # Set width to None
df = pd.read_csv('results.csv')

In [2]:
import pandas as pd
import os
import re
import json
import base64
import requests
import time
import urllib.parse
import os

from dotenv import load_dotenv, find_dotenv
from openai import AzureOpenAI

_ = load_dotenv(find_dotenv()) 

pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.max_colwidth', None)  # Set max column width to None
pd.set_option('display.width', None)  # Set width to None
# First, let's test by hard coding your Mulesoft Azure API key into the next line
API_KEY = 'X'  ##### Paste your API key between the quotes #####
API_VERSION = '2024-12-01-preview'  # For the most recent production release: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
RESOURCE_ENDPOINT = 'X'  # no trailing slash--this is used by libraries as a partial URL
DEPLOYMENT_NAME = "o3-mini-2025-01-31" 

In [3]:
client = AzureOpenAI(
    api_key=API_KEY,
    api_version=API_VERSION,
    azure_endpoint=RESOURCE_ENDPOINT,
)

client

<openai.lib.azure.AzureOpenAI at 0x7f4c6c6d27d0>

In [4]:
import pandas as pd
import requests
import time
import os
import json
from tqdm import tqdm  # For progress bar

# Function to get a summary from Azure OpenAI
def get_summary(chief_complaint, discharge_summary, age, sex):
    # Check if key fields are missing
    if pd.isna(chief_complaint) or pd.isna(discharge_summary):
        return None  # Skip if any key field is missing
    
    url = f"{RESOURCE_ENDPOINT}/openai/deployments/{DEPLOYMENT_NAME}/chat/completions?api-version={API_VERSION}"
    
    headers = {
        "Content-Type": "application/json",
        "api-key": API_KEY
    }
    
    payload = {
        "messages": [
            {"role": "user", "content": f"You are an experienced emergency department (ED) physician creating a one-liner for a NEW patient who has just arrived at the ED. The patient's past medical records are available to you. Your task is to summarize the patient's relevant PAST medical history and end with their CURRENT chief complaint that is given with no adjectives about the chief complaint as you can NOT assume anything about their current condition. All notes and medical records provided are from PAST encounters, not the current visit.Create a concise one-liner summary for a patient who has just arrived at the Emergency Department. The one-liner must:\n\n"
                                      f"1. Start with demographic information (age, sex). Example of a one liner:  80 y.o. old male, with h/o of HFpEF (EF 55-60% 05/20/22), HTN, HLD, and bipolar disorder presenting with shortness of breath. \n"
                                      f"2. Include a concise summary of relevant PAST medical history from previous visits/notes\n"
                                      f"3. End with just CURRENT presenting chief complaint that is not capitilized in the summary and does have additional information regarding the chief complaint: '{chief_complaint}'\n\n"
                                      f"IMPORTANT: Everything in the notes is from PAST encounters. The patient is NOW presenting with a NEW complaint: '{chief_complaint}'.\n\n"
                                      f"Age: {age}\n"
                                      f"Sex: {sex}\n"
                                      f"PAST Medical Records:\n{discharge_summary}"}
        ],
      #  "temperature": 0.1,
        "max_completion_tokens": 4096
    }
    
    retries = 0
    max_retries = 5  # Increased from 3 to 5
    backoff_factor = 2  # For exponential backoff
    
    while retries < max_retries:
        try:
            response = requests.post(url, headers=headers, json=payload)
            
            # Handle rate limiting (status code 429) or other 4xx errors
            if response.status_code == 429 or (response.status_code >= 400 and response.status_code < 500):
                wait_time = (backoff_factor ** retries) * 2  # Exponential backoff
                print(f"Rate limit hit or error {response.status_code}. Waiting for {wait_time} seconds before retry...")
                time.sleep(wait_time)
                retries += 1
                continue
                
            response.raise_for_status()  # Raise an error for other non-200 responses
            return response.json()["choices"][0]["message"]["content"].strip()
            
        except requests.exceptions.RequestException as e:
            print(f"Request failed: {e}. Retrying {retries+1}/{max_retries}...")
            wait_time = (backoff_factor ** retries) * 2  # Exponential backoff
            time.sleep(wait_time)
            retries += 1
    
    return None  # Return None if all retries fail

# Function to process dataframe with checkpoint saving
def process_dataframe_with_checkpoints(df, checkpoint_file="o3_processing_checkpoint.json", output_file="o3_ehr_review.csv", batch_size=10):
    # Check if there's a checkpoint to resume from
    start_index = 0
    if os.path.exists(checkpoint_file):
        with open(checkpoint_file, 'r') as f:
            checkpoint_data = json.load(f)
            start_index = checkpoint_data.get('last_processed_index', 0) + 1
            print(f"Resuming from index {start_index}")
            
            # If there's a partially processed CSV, load it
            if os.path.exists(output_file):
                saved_df = pd.read_csv(output_file)
                # Ensure it has the Generated_Summary column
                if 'Generated_Summary' not in saved_df.columns:
                    saved_df['Generated_Summary'] = None
                # Transfer any already processed summaries
                for idx in range(start_index):
                    if idx < len(df) and idx < len(saved_df):
                        if not pd.isna(saved_df.loc[idx, 'Generated_Summary']):
                            df.loc[idx, 'Generated_Summary'] = saved_df.loc[idx, 'Generated_Summary']

    # Initialize Generated_Summary column if it doesn't exist
    if 'Generated_Summary' not in df.columns:
        df['Generated_Summary'] = None
    
    # Process in batches with progress bar
    total_rows = len(df)
    progress_bar = tqdm(total=total_rows, initial=start_index, desc="Processing records")
    
    for i in range(start_index, total_rows):
        row = df.iloc[i]
        
        # Process the current row
        summary = get_summary(
            row["primarychiefcomplaintname"], 
            row["Discharge_Summary_Text"],
            row["Age"],
            row["sex"]
        )
        
        # Update dataframe
        df.loc[i, 'Generated_Summary'] = summary
        
        # Update progress bar
        progress_bar.update(1)
        
        # Add delay between API calls to prevent rate limiting
        time.sleep(1)  # Wait 1 second between calls
        
        # Save checkpoint and intermediate results after each batch
        if (i + 1) % batch_size == 0 or i == total_rows - 1:
            # Save checkpoint
            with open(checkpoint_file, 'w') as f:
                json.dump({'last_processed_index': i}, f)
            
            # Save current results
            df.to_csv(output_file, index=False)
            print(f"\nCheckpoint saved at index {i}")
    
    progress_bar.close()
    print(f"Processing complete. Results saved to {output_file}")
    
    # Clean up checkpoint file when done
    if os.path.exists(checkpoint_file):
        os.remove(checkpoint_file)
    
    return df

# Apply the processing function to the dataframe
df = process_dataframe_with_checkpoints(df)

Resuming from index 3520


Processing records:  89%|████████▊ | 3530/3984 [03:06<2:32:04, 20.10s/it]


Checkpoint saved at index 3529


Processing records:  89%|████████▉ | 3540/3984 [05:43<2:02:05, 16.50s/it]


Checkpoint saved at index 3539


Processing records:  89%|████████▉ | 3550/3984 [08:21<1:43:11, 14.27s/it]


Checkpoint saved at index 3549


Processing records:  89%|████████▉ | 3560/3984 [11:01<1:38:05, 13.88s/it]


Checkpoint saved at index 3559


Processing records:  90%|████████▉ | 3570/3984 [14:00<2:04:18, 18.02s/it]


Checkpoint saved at index 3569


Processing records:  90%|████████▉ | 3580/3984 [16:45<1:51:13, 16.52s/it]


Checkpoint saved at index 3579


Processing records:  90%|█████████ | 3590/3984 [19:40<1:33:30, 14.24s/it]


Checkpoint saved at index 3589


Processing records:  90%|█████████ | 3600/3984 [22:34<1:58:35, 18.53s/it]


Checkpoint saved at index 3599


Processing records:  91%|█████████ | 3610/3984 [25:49<2:16:31, 21.90s/it]


Checkpoint saved at index 3609


Processing records:  91%|█████████ | 3620/3984 [29:35<2:17:55, 22.73s/it]


Checkpoint saved at index 3619


Processing records:  91%|█████████ | 3630/3984 [32:26<1:36:33, 16.36s/it]


Checkpoint saved at index 3629


Processing records:  91%|█████████▏| 3640/3984 [36:15<2:21:03, 24.60s/it]


Checkpoint saved at index 3639


Processing records:  92%|█████████▏| 3650/3984 [39:51<1:49:17, 19.63s/it]


Checkpoint saved at index 3649


Processing records:  92%|█████████▏| 3660/3984 [42:32<1:17:56, 14.43s/it]


Checkpoint saved at index 3659


Processing records:  92%|█████████▏| 3670/3984 [44:56<1:21:24, 15.56s/it]


Checkpoint saved at index 3669


Processing records:  92%|█████████▏| 3680/3984 [47:45<1:32:25, 18.24s/it]


Checkpoint saved at index 3679


Processing records:  93%|█████████▎| 3690/3984 [50:55<1:32:21, 18.85s/it]


Checkpoint saved at index 3689


Processing records:  93%|█████████▎| 3700/3984 [54:13<2:01:44, 25.72s/it]


Checkpoint saved at index 3699


Processing records:  93%|█████████▎| 3710/3984 [57:23<1:15:34, 16.55s/it]


Checkpoint saved at index 3709


Processing records:  93%|█████████▎| 3720/3984 [1:00:20<1:21:50, 18.60s/it]


Checkpoint saved at index 3719


Processing records:  94%|█████████▎| 3730/3984 [1:03:39<1:27:59, 20.79s/it]


Checkpoint saved at index 3729


Processing records:  94%|█████████▍| 3740/3984 [1:07:03<1:23:22, 20.50s/it]


Checkpoint saved at index 3739


Processing records:  94%|█████████▍| 3750/3984 [1:09:35<56:42, 14.54s/it]  


Checkpoint saved at index 3749


Processing records:  94%|█████████▍| 3760/3984 [1:11:55<48:30, 12.99s/it]  


Checkpoint saved at index 3759


Processing records:  95%|█████████▍| 3770/3984 [1:15:03<1:08:22, 19.17s/it]


Checkpoint saved at index 3769


Processing records:  95%|█████████▍| 3780/3984 [1:17:48<52:31, 15.45s/it]  


Checkpoint saved at index 3779


Processing records:  95%|█████████▌| 3790/3984 [1:20:15<40:31, 12.54s/it]  


Checkpoint saved at index 3789


Processing records:  95%|█████████▌| 3800/3984 [1:22:33<44:31, 14.52s/it]


Checkpoint saved at index 3799


Processing records:  96%|█████████▌| 3810/3984 [1:25:52<56:15, 19.40s/it]


Checkpoint saved at index 3809


Processing records:  96%|█████████▌| 3820/3984 [1:29:01<47:56, 17.54s/it]  


Checkpoint saved at index 3819


Processing records:  96%|█████████▌| 3830/3984 [1:31:50<41:49, 16.30s/it]  


Checkpoint saved at index 3829


Processing records:  96%|█████████▋| 3840/3984 [1:35:59<1:05:32, 27.31s/it]


Checkpoint saved at index 3839


Processing records:  97%|█████████▋| 3850/3984 [1:38:43<37:32, 16.81s/it]  


Checkpoint saved at index 3849


Processing records:  97%|█████████▋| 3860/3984 [1:42:11<47:00, 22.75s/it]


Checkpoint saved at index 3859


Processing records:  97%|█████████▋| 3870/3984 [1:45:07<26:19, 13.85s/it]


Checkpoint saved at index 3869


Processing records:  97%|█████████▋| 3880/3984 [1:48:29<31:48, 18.35s/it]


Checkpoint saved at index 3879


Processing records:  98%|█████████▊| 3890/3984 [1:51:28<28:11, 17.99s/it]


Checkpoint saved at index 3889


Processing records:  98%|█████████▊| 3900/3984 [1:54:52<31:43, 22.66s/it]


Checkpoint saved at index 3899


Processing records:  98%|█████████▊| 3910/3984 [1:57:47<21:59, 17.83s/it]


Checkpoint saved at index 3909


Processing records:  98%|█████████▊| 3920/3984 [2:01:06<18:36, 17.44s/it]


Checkpoint saved at index 3919


Processing records:  99%|█████████▊| 3930/3984 [2:04:41<20:08, 22.37s/it]


Checkpoint saved at index 3929


Processing records:  99%|█████████▉| 3940/3984 [2:07:41<17:03, 23.26s/it]


Checkpoint saved at index 3939


Processing records:  99%|█████████▉| 3950/3984 [2:11:29<09:44, 17.18s/it]


Checkpoint saved at index 3949


Processing records:  99%|█████████▉| 3960/3984 [2:14:33<05:50, 14.60s/it]


Checkpoint saved at index 3959


Processing records: 100%|█████████▉| 3970/3984 [2:16:27<02:18,  9.93s/it]


Checkpoint saved at index 3969


Processing records: 100%|█████████▉| 3980/3984 [2:19:07<01:04, 16.14s/it]


Checkpoint saved at index 3979


Processing records: 100%|██████████| 3984/3984 [2:20:15<00:00, 18.14s/it]


Checkpoint saved at index 3983
Processing complete. Results saved to o3_ehr_review.csv



