In [1]:
## o-3 disposition prediction 

In [2]:
import pandas as pd
pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.max_colwidth', None)  # Set max column width to None
pd.set_option('display.width', None)  # Set width to None
df = pd.read_csv('results.csv')

In [3]:
import re
def extract_until_medical_decision(text):
    if pd.isna(text):
        return None
    # Split at first occurrence of "Medical Decision" (case-insensitive)
    parts = re.split(r'\bMedical Decision\b', text, flags=re.IGNORECASE)
    return parts[0].strip() if parts else None

# Apply it to the column
df['ED_Presentations'] = df['ED_Provider_Notes_Text'].apply(extract_until_medical_decision)


In [4]:
import pandas as pd
import os
import re
import json
import base64
import requests
import time
import urllib.parse

pd.set_option('display.max_rows', None)  # Display all rows
pd.set_option('display.max_columns', None)  # Display all columns
pd.set_option('display.max_colwidth', None)  # Set max column width to None
pd.set_option('display.width', None)  # Set width to None
# First, let's test by hard coding your Mulesoft Azure API key into the next line
API_KEY = 'X'  ##### Paste your API key between the quotes #####
API_VERSION = '2024-12-01-preview'  # For the most recent production release: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release
RESOURCE_ENDPOINT = 'https://unified-api.ucsf.edu/general'  # no trailing slash--this is used by libraries as a partial URL
DEPLOYMENT_NAME = "o3-mini-2025-01-31" 

In [5]:
from dotenv import load_dotenv, find_dotenv
from openai import AzureOpenAI

In [6]:
client = AzureOpenAI(
    api_key=API_KEY,
    api_version=API_VERSION,
    azure_endpoint=RESOURCE_ENDPOINT,
)

client

<openai.lib.azure.AzureOpenAI at 0x7ff07bc6a8d0>

In [7]:
import pandas as pd
import numpy as np
import requests
import time
import os
import json


def debug_api_response(response, context="API Call"):
    """Print detailed debugging info about API response"""
    print(f"--- DEBUG: {context} ---")
    print(f"Status code: {response.status_code}")
    try:
        json_data = response.json()
        print(f"JSON structure keys: {list(json_data.keys())}")
        if "choices" in json_data and len(json_data["choices"]) > 0:
            choice = json_data["choices"][0]
            print(f"First choice keys: {list(choice.keys())}")
            if "message" in choice:
                message = choice["message"]
                print(f"Message keys: {list(message.keys())}")
                if "content" in message:
                    print(f"Content (first 100 chars): {message['content'][:100]}...")
                else:
                    print("No 'content' in message")
            else:
                print("No 'message' in first choice")
        else:
            print("No 'choices' in response or empty choices")
    except Exception as e:
        print(f"Error parsing response: {e}")
        print(f"Raw response text: {response.text[:500]}...")
    print("-------------------")

def predict_disposition_with_dynamic_notes(row):
    # Extract all available fields
    chief_complaint = row['primarychiefcomplaintname']
    sex = row['sex']
    age = row['Age']
    presentation = row['ED_Presentations']
    
    # Check if necessary basic fields exist
    if pd.isna(chief_complaint):
        return None, None  # Skip if chief complaint is missing, return None for both prediction and requested notes
    
    # Initialize available note types with their existence status
    available_notes = {
        'Discharge Summary': not pd.isna(row.get('Discharge_Summary_Text')),
        'Progress Notes': not pd.isna(row.get('Progress_Note_Text')),
        'H&P': not pd.isna(row.get('HP_Note_Text')),
        'Echo': not pd.isna(row.get('Echo_Text')),
        'Imaging': not pd.isna(row.get('Imaging_Text')),
        'Consult': not pd.isna(row.get('Consult_Text')),
        'ECG': not pd.isna(row.get('ECG_Text')),
    }
    
    # Step 1: Ask the model which notes it wants to see (always include Discharge Summary if available)
    url = f"{RESOURCE_ENDPOINT}/openai/deployments/{DEPLOYMENT_NAME}/chat/completions?api-version={API_VERSION}"
    
    headers = {
        "Content-Type": "application/json",
        "api-key": API_KEY
    }
    
    # First message to decide which notes to read - using a straightforward format that works with o3
    selection_payload = {
        "messages": [
            {"role": "user", "content": f"You are an experienced Emergency Department (ED) physician. Your task is to decide which medical notes you need to read to predict the patient's ED disposition based on the chief complaint, PMH, physical exam findings, age, and sex. Patient basic info: {age}yo {sex} with chief complaint: {chief_complaint}\n\n"
                                      f"Available notes (respond ONLY with the names of notes you want to see, separated by commas):\n"
                                      f"- Discharge Summary: {'Available' if available_notes['Discharge Summary'] else 'Not available'}\n"
                                      f"- Progress Notes: {'Available' if available_notes['Progress Notes'] else 'Not available'}\n"
                                      f"- H&P: {'Available' if available_notes['H&P'] else 'Not available'}\n"
                                      f"- Echo: {'Available' if available_notes['Echo'] else 'Not available'}\n"
                                      f"- Imaging: {'Available' if available_notes['Imaging'] else 'Not available'}\n"
                                      f"- Consult: {'Available' if available_notes['Consult'] else 'Not available'}\n"
                                      f"- ECG: {'Available' if available_notes['ECG'] else 'Not available'}\n"
                                      f"Based on the chief complaint, list ONLY the note types you need to review (comma-separated, no explanation). Always include Discharge Summary if available:"}
        ]
    }
    
    # Request note selection
    retries = 0
    requested_notes = []
    notes_requested_str = ""
    
    while retries < 3:
        try:
            selection_response = requests.post(url, headers=headers, json=selection_payload)
            selection_response.raise_for_status()
            
            # Debug the selection response
            debug_api_response(selection_response, "Note Selection")
            
            # Get requested note types
            response_json = selection_response.json()
            if "choices" in response_json and len(response_json["choices"]) > 0:
                if "message" in response_json["choices"][0]:
                    notes_text = response_json["choices"][0]["message"].get("content", "").strip()
                    requested_notes = [note.strip() for note in notes_text.split(',')]
                    notes_requested_str = notes_text
            
            # Filter out unavailable notes
            requested_notes = [note for note in requested_notes 
                               if note in available_notes.keys() and available_notes[note]]
            
            # Always include Discharge Summary if available and not already requested
            if available_notes['Discharge Summary'] and 'Discharge Summary' not in requested_notes:
                requested_notes.append('Discharge Summary')
                if notes_requested_str:
                    notes_requested_str += ", Discharge Summary (auto-added)"
                else:
                    notes_requested_str = "Discharge Summary (auto-added)"
            
            break
        except requests.exceptions.RequestException as e:
            print(f"Note selection request failed: {e}. Retrying {retries+1}/3...")
            time.sleep(5)
            retries += 1
    
    # Default if selection failed
    if not requested_notes and available_notes['Discharge Summary']:
        requested_notes = ['Discharge Summary']
        notes_requested_str = "Failed to get selections. Defaulted to: Discharge Summary"
    elif not requested_notes:
        available_notes_list = [note for note, available in available_notes.items() if available][:2]
        requested_notes = available_notes_list
        notes_requested_str = f"Failed to get selections. Defaulted to: {', '.join(available_notes_list)}"
    
    # Step 2: Prepare the actual notes content
    notes_content = ""
    note_type_to_column = {
        'Discharge Summary': 'Discharge_Summary_Text',
        'Progress Notes': 'Progress_Note_Text',
        'H&P': 'HP_Note_Text',
        'Echo': 'Echo_Text',
        'Imaging': 'Imaging_Text',
        'Consult': 'Consult_Text',
        'ECG': 'ECG_Text',
    }
    
    def truncate_text(text, max_chars=3000):
        if text and len(text) > max_chars:
            return text[:max_chars] + "..."
        return text
    
    for note_type in requested_notes:
        column_name = note_type_to_column.get(note_type)
        if column_name and not pd.isna(row.get(column_name)):
            notes_content += f"\n\n{note_type}:\n{truncate_text(row[column_name])}"
    
    # Step 3: Generate the diagnosis prediction - using the exact same format as the working selection request
    prediction_payload = {
        "messages": [
            {"role": "user", "content": f"You are an experienced Emergency Department (ED) physician tasked with predicting the most likely disposition for a patient based on their presentation and physical, chief complaint, and available past medical information. Based on the patient's chief complaint, age, sex, and available clinical information, predict the most likely ED disposition from the following choices: 'Discharge', 'Admit', 'Eloped', 'Transfer to Another Facility', 'AMA', 'OR Admit', 'LWBS after Triage', 'Send to L&D', 'Expired','Dismissed - Never Arrived', 'Observation', 'None'-- ONLY RESPOND WITH THESE OPTIONS, no explanations.\n\n"
                                      f"CURRENT Chief Complaint: {chief_complaint}\n"
                                      f"Age: {age}\n"
                                      f"Sex: {sex}\n"
                                      f"CURRENT ED Presentation: {presentation}\n"
                                      f"PAST MEDICAL HISTORY: {notes_content}"}
        ]
    }
    
    # Request the diagnosis prediction
    retries = 0
    while retries < 3:
        try:
            prediction_response = requests.post(url, headers=headers, json=prediction_payload)
            prediction_response.raise_for_status()
            
            # Debug the prediction response
            debug_api_response(prediction_response, "Disposition Prediction")
            
            # Extract the prediction from the response
            response_json = prediction_response.json()
            if "choices" in response_json and len(response_json["choices"]) > 0:
                if "message" in response_json["choices"][0]:
                    prediction = response_json["choices"][0]["message"].get("content", "").strip()
                    return prediction, notes_requested_str
            
            # If we couldn't extract a prediction, return None
            print("Warning: Could not extract prediction from API response")
            return None, notes_requested_str
            
        except requests.exceptions.RequestException as e:
            print(f"Prediction request failed: {e}. Retrying {retries+1}/3...")
            time.sleep(5)
            retries += 1
    
    return None, notes_requested_str  # Return None for prediction and the requested notes string if all retries fail

def process_in_batches(df, batch_size=20, output_file="o3_disposition_predictions.csv"):
    """
    Process dataframe in batches, saving progress after each batch
    
    Args:
        df: DataFrame to process
        batch_size: Number of samples to process before saving
        output_file: Where to save the results
    
    Returns:
        Updated DataFrame with predictions
    """
    # Check if output file exists to resume processing
    if os.path.exists(output_file):
        print(f"Found existing output file {output_file}, resuming from there...")
        existing_df = pd.read_csv(output_file)
        
        # Identify which rows have already been processed
        processed_count = sum(~existing_df['Predicted_Disposition'].isna())
        print(f"Found {processed_count} already processed samples out of {len(existing_df)}")
        
        # Merge existing predictions back to original dataframe
        # Ensure index alignment for proper merging
        if len(existing_df) == len(df) and 'Predicted_Disposition' in existing_df.columns:
            # Create a mask for already processed rows
            already_processed = ~existing_df['Predicted_Disposition'].isna()
            
            # Only keep valid predictions and requested notes
            df.loc[already_processed, 'Predicted_Disposition'] = existing_df.loc[already_processed, 'Predicted_Disposition']
            df.loc[already_processed, 'Requested_Notes'] = existing_df.loc[already_processed, 'Requested_Notes']
            
            print(f"Restored {sum(already_processed)} existing predictions")
        else:
            # If file exists but structure doesn't match, initialize columns
            print("Output file structure doesn't match or is empty. Starting fresh.")
            df['Predicted_Disposition'] = None
            df['Requested_Notes'] = None
    else:
        # Initialize prediction columns if starting fresh
        print("Starting new prediction process...")
        df['Predicted_Disposition'] = None
        df['Requested_Notes'] = None
    
    # Get indices of rows that still need processing
    rows_to_process = df[pd.isna(df['Predicted_Disposition'])].index.tolist()
    total_rows = len(rows_to_process)
    
    print(f"Processing {total_rows} remaining samples in batches of {batch_size}")
    
    # Process in batches
    batch_count = 0
    for i in range(0, total_rows, batch_size):
        batch_indices = rows_to_process[i:i+batch_size]
        batch_count += 1
        
        print(f"Processing batch {batch_count}, samples {i+1}-{min(i+batch_size, total_rows)} of {total_rows}")
        
        # Process each sample in the batch
        for idx in batch_indices:
            row = df.loc[idx]
            
            # Predict disposition
            try:
                prediction, notes_requested = predict_disposition_with_dynamic_notes(row)
                
                # Debug the results
                print(f"Sample {idx}: Prediction = '{prediction}', Notes = '{notes_requested[:30]}...'")
                
                # Store results in dataframe
                df.at[idx, 'Predicted_Disposition'] = prediction
                df.at[idx, 'Requested_Notes'] = notes_requested
                
                # Print progress for every 5 samples
                if (batch_indices.index(idx) + 1) % 5 == 0 or batch_indices.index(idx) + 1 == len(batch_indices):
                    print(f"  Processed {batch_indices.index(idx) + 1}/{len(batch_indices)} samples in current batch")
            except Exception as e:
                print(f"Error processing sample {idx}: {e}")
                # Continue with next sample
        
        # Save after each batch
        print(f"Saving progress after batch {batch_count}...")
        df.to_csv(output_file, index=False)
        
        # Calculate completion percentage
        completed = total_rows - len(rows_to_process[i+len(batch_indices):])
        print(f"Overall progress: {completed}/{total_rows} ({completed/total_rows*100:.1f}%)")
    
    return df

# Main execution
if __name__ == "__main__":
    # Load your data
    print("Loading data...")
    # Replace with your actual data loading code
    # df = pd.read_csv("your_data.csv")
    
    # Set batch size and output file
    BATCH_SIZE = 20
    OUTPUT_FILE = "new_o3_disposition_predictions.csv"
    
    # Process data in batches with checkpoint saving
    df = process_in_batches(df, batch_size=BATCH_SIZE, output_file=OUTPUT_FILE)
    
    print(f"Processing complete! Results saved to {OUTPUT_FILE}")

Loading data...
Found existing output file new_o3_disposition_predictions.csv, resuming from there...
Found 3560 already processed samples out of 3984
Restored 3560 existing predictions
Processing 424 remaining samples in batches of 20
Processing batch 1, samples 1-20 of 424
--- DEBUG: Note Selection ---
Status code: 200
JSON structure keys: ['choices', 'created', 'id', 'model', 'object', 'prompt_filter_results', 'system_fingerprint', 'usage']
First choice keys: ['content_filter_results', 'finish_reason', 'index', 'logprobs', 'message']
Message keys: ['annotations', 'content', 'refusal', 'role']
Content (first 100 chars): Discharge Summary, H&P, Progress Notes, ECG...
-------------------
--- DEBUG: Disposition Prediction ---
Status code: 200
JSON structure keys: ['choices', 'created', 'id', 'model', 'object', 'prompt_filter_results', 'system_fingerprint', 'usage']
First choice keys: ['content_filter_results', 'finish_reason', 'index', 'logprobs', 'message']
Message keys: ['annotations'

In [1]:
import pandas as pd
results = pd.read_csv('new_o3_disposition_predictions.csv')

In [9]:
# Add column to track accuracy of disposition prediction
results["Prediction_Correct"] = results["Predicted_Disposition"] == results["eddisposition"]

# Calculate overall accuracy
accuracy = (results["Prediction_Correct"].sum() / results["Prediction_Correct"].count()) * 100
print(f"Overall disposition prediction accuracy: {accuracy:.2f}%")

Overall disposition prediction accuracy: 63.08%


In [4]:
# List of valid disposition options
valid_dispositions = ['Discharge', 'Admit', 'Eloped', 'Transfer to Another Facility', 'AMA', 
                     'OR Admit', 'LWBS after Triage', 'Send to L&D', 'Expired',
                     'Dismissed - Never Arrived', 'Observation', 'None']

# Add column to track accuracy of disposition prediction
results["Prediction_Correct"] = results["Predicted_Disposition"] == results["eddisposition"]

# Calculate overall accuracy
accuracy = (results["Prediction_Correct"].sum() / results["Prediction_Correct"].count()) * 100
print(f"Overall disposition prediction accuracy: {accuracy:.2f}%")

# Create a confusion matrix to see the model's behavior across disposition types
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Convert both columns to string type to ensure consistent comparison
results["eddisposition"] = results["eddisposition"].astype(str)
results["Predicted_Disposition"] = results["Predicted_Disposition"].astype(str)

# Create confusion matrix using only valid dispositions
cm = confusion_matrix(results["eddisposition"], results["Predicted_Disposition"], 
                      labels=valid_dispositions)

# Convert to DataFrame for better visualization
cm_df = pd.DataFrame(cm, index=valid_dispositions, columns=valid_dispositions)

# Plot confusion matrix
plt.figure(figsize=(12, 10))
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.xlabel('Predicted Disposition')
plt.ylabel('Actual Disposition')
plt.title('Confusion Matrix for Disposition Predictions')

# Calculate metrics per class
print("\nDetailed classification report:")
print(classification_report(results["eddisposition"], results["Predicted_Disposition"], 
                           labels=valid_dispositions, zero_division=0))

# Get per-class accuracy
class_accuracy = {}
for disp in valid_dispositions:
    class_subset = results[results["eddisposition"] == disp]
    if len(class_subset) > 0:
        class_acc = (class_subset["Prediction_Correct"].sum() / len(class_subset)) * 100
        class_accuracy[disp] = class_acc
    else:
        class_accuracy[disp] = 0  # Handle classes with no samples

# Print per-class accuracy
print("\nAccuracy by disposition class:")
for disp, acc in class_accuracy.items():
    count = len(results[results["eddisposition"] == disp])
    print(f"{disp}: {acc:.2f}% (n={count})")

# Analyze error patterns
print("\nMost common misclassifications:")
error_cases = results[~results["Prediction_Correct"]]
error_combinations = error_cases.groupby(["eddisposition", "Predicted_Disposition"]).size().reset_index()
error_combinations.columns = ["Actual", "Predicted", "Count"]
error_combinations = error_combinations.sort_values("Count", ascending=False).head(10)
print(error_combinations)

# Distribution of predictions vs actual
plt.figure(figsize=(14, 6))
plt.subplot(1, 2, 1)
results["eddisposition"].value_counts()[valid_dispositions].plot(kind="bar")
plt.title("Actual Disposition Distribution")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

plt.subplot(1, 2, 2)
results["Predicted_Disposition"].value_counts()[valid_dispositions].plot(kind="bar")
plt.title("Predicted Disposition Distribution")
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

# Add a table showing frequency of each class
plt.figure(figsize=(10, 6))
class_counts = pd.DataFrame({
    'Actual': results["eddisposition"].value_counts(),
    'Predicted': results["Predicted_Disposition"].value_counts()
})
class_counts = class_counts.reindex(valid_dispositions).fillna(0).astype(int)
class_counts['Difference'] = class_counts['Predicted'] - class_counts['Actual']
print("\nComparison of class frequencies:")
print(class_counts)

# Calculate class-wise precision and recall
precision_recall = pd.DataFrame(index=valid_dispositions, columns=['Precision', 'Recall', 'F1-Score'])
for disp in valid_dispositions:
    # True positives: predicted this class correctly
    tp = len(results[(results["eddisposition"] == disp) & (results["Predicted_Disposition"] == disp)])
    # False positives: predicted this class incorrectly
    fp = len(results[(results["eddisposition"] != disp) & (results["Predicted_Disposition"] == disp)])
    # False negatives: should have predicted this class but didn't
    fn = len(results[(results["eddisposition"] == disp) & (results["Predicted_Disposition"] != disp)])
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    precision_recall.loc[disp] = [precision, recall, f1]

print("\nClass-wise precision and recall:")
print(precision_recall)

Overall disposition prediction accuracy: 63.08%

Detailed classification report:
                              precision    recall  f1-score   support

                   Discharge       0.76      0.30      0.43      1465
                       Admit       0.66      0.90      0.76      2282
                      Eloped       0.25      0.03      0.06        29
Transfer to Another Facility       0.07      0.01      0.01       150
                         AMA       0.00      0.00      0.00        32
                    OR Admit       0.05      0.25      0.08        12
           LWBS after Triage       0.00      0.00      0.00         1
                 Send to L&D       0.00      0.00      0.00         1
                     Expired       1.00      0.67      0.80         3
   Dismissed - Never Arrived       0.00      0.00      0.00         1
                 Observation       0.00      0.00      0.00         1
                        None       0.00      0.00      0.00         0

       

KeyError: "Passing list-likes to .loc or [] with any missing labels is no longer supported. The following labels were missing: Index(['None'], dtype='object'). See https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike"

In [5]:
# List of valid disposition options
valid_dispositions = ['Discharge', 'Admit', 'Eloped', 'Transfer to Another Facility', 'AMA', 
                     'OR Admit', 'LWBS after Triage', 'Send to L&D', 'Expired',
                     'Dismissed - Never Arrived', 'Observation', 'None']

# Convert both columns to string type to ensure consistent comparison
results["eddisposition"] = results["eddisposition"].astype(str)
results["Predicted_Disposition"] = results["Predicted_Disposition"].astype(str)

# Create a detailed analysis for each class
print("DETAILED ANALYSIS FOR EACH DISPOSITION CLASS")
print("============================================")

for disp in valid_dispositions:
    # Count how many instances of this class exist in the ground truth
    true_instances = results[results["eddisposition"] == disp]
    true_count = len(true_instances)
    
    # Count how many were predicted as this class
    pred_count = len(results[results["Predicted_Disposition"] == disp])
    
    # How many were correctly predicted
    correct_count = len(true_instances[true_instances["Predicted_Disposition"] == disp])
    
    # Calculate accuracy for this class
    class_accuracy = (correct_count / true_count * 100) if true_count > 0 else 0
    
    # Calculate precision for this class
    precision = (correct_count / pred_count * 100) if pred_count > 0 else 0
    
    print(f"\n{disp}:")
    print(f"  Ground truth count: {true_count} instances")
    print(f"  Model predicted this class: {pred_count} times")
    print(f"  Correctly predicted: {correct_count} instances")
    print(f"  Class accuracy (recall): {class_accuracy:.2f}%")
    print(f"  Precision: {precision:.2f}%")
    
    # If there are any instances of this class in the ground truth
    if true_count > 0:
        # Distribution of predictions for this ground truth class
        pred_distribution = true_instances["Predicted_Disposition"].value_counts()
        
        print("\n  When ground truth is '{}', model predicted:".format(disp))
        for pred_class, count in pred_distribution.items():
            percentage = count / true_count * 100
            print(f"    - '{pred_class}': {count} times ({percentage:.2f}%)")

# Create a comprehensive confusion matrix
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Create confusion matrix
cm = confusion_matrix(results["eddisposition"], results["Predicted_Disposition"], 
                     labels=valid_dispositions)

# Convert to percentages (row-wise, showing what % of each ground truth was predicted as each class)
cm_percentage = np.zeros(cm.shape)
for i in range(cm.shape[0]):
    row_sum = cm[i].sum()
    if row_sum > 0:
        cm_percentage[i] = cm[i] / row_sum * 100

# Convert to DataFrame for better visualization
cm_df = pd.DataFrame(cm, index=valid_dispositions, columns=valid_dispositions)
cm_pct_df = pd.DataFrame(cm_percentage, index=valid_dispositions, columns=valid_dispositions)

# Plot confusion matrices
fig, axes = plt.subplots(1, 2, figsize=(20, 8))

# Raw counts
sns.heatmap(cm_df, annot=True, fmt='d', cmap='Blues', cbar=False, ax=axes[0])
axes[0].set_xlabel('Predicted Disposition')
axes[0].set_ylabel('Actual Disposition (Ground Truth)')
axes[0].set_title('Confusion Matrix - Raw Counts')

# Percentages
sns.heatmap(cm_pct_df, annot=True, fmt='.1f', cmap='Blues', cbar=False, ax=axes[1])
axes[1].set_xlabel('Predicted Disposition')
axes[1].set_ylabel('Actual Disposition (Ground Truth)')
axes[1].set_title('Confusion Matrix - Row Percentages (%)')

plt.tight_layout()

# Summary table with all metrics
summary = pd.DataFrame(index=valid_dispositions, 
                      columns=['True Count', 'Predicted Count', 'Correct', 'Accuracy (%)', 'Precision (%)'])

for disp in valid_dispositions:
    true_count = len(results[results["eddisposition"] == disp])
    pred_count = len(results[results["Predicted_Disposition"] == disp])
    correct = len(results[(results["eddisposition"] == disp) & 
                          (results["Predicted_Disposition"] == disp)])
    
    accuracy = (correct / true_count * 100) if true_count > 0 else 0
    precision = (correct / pred_count * 100) if pred_count > 0 else 0
    
    summary.loc[disp] = [true_count, pred_count, correct, accuracy, precision]

print("\nSUMMARY TABLE")
print("=============")
print(summary)

# Calculate overall metrics
total_correct = results["Prediction_Correct"].sum()
total_samples = len(results)
overall_accuracy = (total_correct / total_samples) * 100

print(f"\nOVERALL ACCURACY: {overall_accuracy:.2f}%")
print(f"Total samples: {total_samples}")
print(f"Correctly predicted: {total_correct}")

DETAILED ANALYSIS FOR EACH DISPOSITION CLASS

Discharge:
  Ground truth count: 1465 instances
  Model predicted this class: 587 times
  Correctly predicted: 446 instances
  Class accuracy (recall): 30.44%
  Precision: 75.98%

  When ground truth is 'Discharge', model predicted:
    - 'Admit': 863 times (58.91%)
    - 'Discharge': 446 times (30.44%)
    - 'Observation': 125 times (8.53%)
    - 'OR Admit': 23 times (1.57%)
    - 'Transfer to Another Facility': 4 times (0.27%)
    - 'Eloped': 3 times (0.20%)
    - 'Send to L&D': 1 times (0.07%)

Admit:
  Ground truth count: 2282 instances
  Model predicted this class: 3118 times
  Correctly predicted: 2060 instances
  Class accuracy (recall): 90.27%
  Precision: 66.07%

  When ground truth is 'Admit', model predicted:
    - 'Admit': 2060 times (90.27%)
    - 'Discharge': 114 times (5.00%)
    - 'Observation': 63 times (2.76%)
    - 'OR Admit': 32 times (1.40%)
    - 'Transfer to Another Facility': 9 times (0.39%)
    - 'AMA': 4 times (0.1