# Operating Expense per Adjusted Discharge - KPI Analysis

This notebook replicates the calculations for the **Operating Expense per Adjusted Discharge** KPI card, including:
1. KPI calculation from raw worksheet data
2. Benchmark calculations at all 4 levels (State+Type, State, Hospital Type, National)
3. Performance comparison and ranking
4. Trend analysis

**Example Hospital**: CCN 310001 (Nebraska)

## 1. Setup and Imports

In [None]:
import duckdb
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from pathlib import Path

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50)

print("✓ Imports complete")

## 2. Connect to Databases

In [None]:
# Connect to pre-computed KPI database
kpi_con = duckdb.connect('data/hospital_analytics.duckdb', read_only=True)

# Connect to worksheet database for raw data
worksheet_con = duckdb.connect('data/hospital_worksheets.duckdb', read_only=True)

print("✓ Connected to both databases")
print(f"  - KPI database: hospital_analytics.duckdb")
print(f"  - Worksheet database: hospital_worksheets.duckdb")

## 3. Hospital Information

In [None]:
# Example hospital
CCN = 310001
FISCAL_YEAR = 2024

# Get hospital metadata
hospital_info = kpi_con.execute("""
    SELECT Provider_Number, State_Code, Hospital_Type
    FROM hospital_metadata
    WHERE Provider_Number = ?
""", [CCN]).df()

print("Hospital Information:")
print("=" * 80)
print(hospital_info.to_string(index=False))
print()

STATE_CODE = hospital_info['State_Code'].iloc[0]
HOSPITAL_TYPE = hospital_info['Hospital_Type'].iloc[0]

print(f"State Code: {STATE_CODE}")
print(f"Hospital Type: {HOSPITAL_TYPE}")

## 4. KPI Calculation: Operating Expense per Adjusted Discharge

**Formula**: Total Operating Expenses / Adjusted Discharges

**Data Source**: 
- Operating Expenses: Worksheet G-3, Line 4 (Total Expenses)
- Adjusted Discharges: Calculated from Worksheet S-3 (Inpatient + Outpatient volumes)

In [None]:
# Get pre-computed KPI value for comparison
precomputed_kpi = kpi_con.execute("""
    SELECT 
        Provider_Number,
        Fiscal_Year,
        Operating_Expense_Ratio as Operating_Expense_per_Adjusted_Discharge
    FROM hospital_kpis
    WHERE Provider_Number = ?
        AND Fiscal_Year = ?
""", [CCN, FISCAL_YEAR]).df()

print("Pre-Computed KPI Value (from database):")
print("=" * 80)
print(precomputed_kpi.to_string(index=False))
precomputed_value = precomputed_kpi['Operating_Expense_per_Adjusted_Discharge'].iloc[0]

In [None]:
# Calculate from raw worksheet data
print("\nCalculating from Raw Worksheet Data:")
print("=" * 80)

# Get operating expenses from Worksheet G-3
operating_expenses = worksheet_con.execute("""
    SELECT 
        Provider_Number,
        Fiscal_Year,
        Line_Number,
        Line_Description,
        Column_0 as Total_Operating_Expenses
    FROM worksheet_g300
    WHERE Provider_Number = ?
        AND Fiscal_Year = ?
        AND Line_Number = 4
""", [CCN, FISCAL_YEAR]).df()

print("\n1. Operating Expenses (Worksheet G-3, Line 4):")
print(operating_expenses.to_string(index=False))

total_operating_expenses = operating_expenses['Total_Operating_Expenses'].iloc[0]
print(f"\n   Total Operating Expenses: ${total_operating_expenses:,.2f}")

In [None]:
# Get discharges and patient days from Worksheet S-3
patient_volumes = worksheet_con.execute("""
    SELECT 
        Provider_Number,
        Fiscal_Year,
        Line_Number,
        Line_Description,
        Column_1 as Inpatient_Days,
        Column_2 as Discharges,
        Column_7 as Outpatient_Visits
    FROM worksheet_s300_part1
    WHERE Provider_Number = ?
        AND Fiscal_Year = ?
        AND Line_Number IN (1, 14)  -- Line 1: Total, Line 14: Total
    ORDER BY Line_Number
""", [CCN, FISCAL_YEAR]).df()

print("\n2. Patient Volumes (Worksheet S-3):")
print(patient_volumes.to_string(index=False))

# Extract values
if len(patient_volumes) > 0:
    total_inpatient_days = patient_volumes[patient_volumes['Line_Number'] == 1]['Inpatient_Days'].iloc[0]
    total_discharges = patient_volumes[patient_volumes['Line_Number'] == 1]['Discharges'].iloc[0]
    total_outpatient_visits = patient_volumes[patient_volumes['Line_Number'] == 14]['Outpatient_Visits'].iloc[0]
    
    print(f"\n   Total Inpatient Days: {total_inpatient_days:,.0f}")
    print(f"   Total Discharges: {total_discharges:,.0f}")
    print(f"   Total Outpatient Visits: {total_outpatient_visits:,.0f}")

In [None]:
# Calculate Adjusted Discharges
print("\n3. Calculate Adjusted Discharges:")
print("=" * 80)

# Average Length of Stay (ALOS)
if total_discharges > 0:
    alos = total_inpatient_days / total_discharges
    print(f"Average Length of Stay (ALOS) = {total_inpatient_days:,.0f} / {total_discharges:,.0f} = {alos:.2f} days")
else:
    alos = 0
    print("No discharges - ALOS = 0")

# Outpatient Equivalent Discharges
if alos > 0:
    outpatient_equivalent = total_outpatient_visits / alos
    print(f"\nOutpatient Equivalent Discharges = {total_outpatient_visits:,.0f} / {alos:.2f} = {outpatient_equivalent:,.2f}")
else:
    outpatient_equivalent = 0
    print(f"\nOutpatient Equivalent Discharges = 0 (ALOS is 0)")

# Adjusted Discharges
adjusted_discharges = total_discharges + outpatient_equivalent
print(f"\nAdjusted Discharges = {total_discharges:,.0f} + {outpatient_equivalent:,.2f} = {adjusted_discharges:,.2f}")

In [None]:
# Calculate Operating Expense per Adjusted Discharge
print("\n4. Calculate KPI:")
print("=" * 80)

if adjusted_discharges > 0:
    calculated_kpi = total_operating_expenses / adjusted_discharges
    print(f"Operating Expense per Adjusted Discharge = ${total_operating_expenses:,.2f} / {adjusted_discharges:,.2f}")
    print(f"\n   = ${calculated_kpi:,.2f}")
else:
    calculated_kpi = None
    print("Cannot calculate - Adjusted Discharges is 0")

# Compare with pre-computed value
print("\n\nComparison:")
print("=" * 80)
print(f"Calculated Value:    ${calculated_kpi:,.2f}" if calculated_kpi else "N/A")
print(f"Pre-computed Value:  ${precomputed_value:,.2f}" if not pd.isna(precomputed_value) else "N/A")

if calculated_kpi and not pd.isna(precomputed_value):
    diff = abs(calculated_kpi - precomputed_value)
    diff_pct = (diff / precomputed_value) * 100
    print(f"Difference:          ${diff:,.2f} ({diff_pct:.4f}%)")
    if diff_pct < 0.01:
        print("\n✓ Values match!")
    else:
        print("\n⚠ Values differ - check calculation logic")

## 5. Benchmark Calculations

Calculate benchmarks at all 4 levels:
1. **State + Hospital Type** (most specific)
2. **State**
3. **Hospital Type**
4. **National** (broadest)

In [None]:
# 1. State + Hospital Type Benchmark
print("1. STATE + HOSPITAL TYPE BENCHMARK")
print("=" * 80)
print(f"State: {STATE_CODE}, Hospital Type: {HOSPITAL_TYPE}")

state_type_benchmark = kpi_con.execute("""
    SELECT 
        Benchmark_Level,
        State_Code,
        Hospital_Type,
        Fiscal_Year,
        KPI_Name,
        Provider_Count,
        P25,
        Median,
        P75,
        Mean
    FROM hospital_benchmarks
    WHERE Benchmark_Level = 'State_Hospital_Type'
        AND State_Code = ?
        AND Hospital_Type = ?
        AND Fiscal_Year = ?
        AND KPI_Name = 'Operating_Expense_Ratio'
""", [STATE_CODE, HOSPITAL_TYPE, FISCAL_YEAR]).df()

print("\nBenchmark Statistics:")
print(state_type_benchmark.to_string(index=False))

if len(state_type_benchmark) > 0:
    st_peers = state_type_benchmark['Provider_Count'].iloc[0]
    st_p25 = state_type_benchmark['P25'].iloc[0]
    st_median = state_type_benchmark['Median'].iloc[0]
    st_p75 = state_type_benchmark['P75'].iloc[0]
    st_mean = state_type_benchmark['Mean'].iloc[0]
    
    print(f"\nPeer Count: {st_peers}")
    print(f"P25:  ${st_p25:,.2f}")
    print(f"P50:  ${st_median:,.2f}")
    print(f"P75:  ${st_p75:,.2f}")
    print(f"Mean: ${st_mean:,.2f}")

In [None]:
# 2. State Benchmark
print("\n\n2. STATE BENCHMARK")
print("=" * 80)
print(f"State: {STATE_CODE}")

state_benchmark = kpi_con.execute("""
    SELECT 
        Benchmark_Level,
        State_Code,
        Fiscal_Year,
        KPI_Name,
        Provider_Count,
        P25,
        Median,
        P75,
        Mean
    FROM hospital_benchmarks
    WHERE Benchmark_Level = 'State'
        AND State_Code = ?
        AND Fiscal_Year = ?
        AND KPI_Name = 'Operating_Expense_Ratio'
""", [STATE_CODE, FISCAL_YEAR]).df()

print("\nBenchmark Statistics:")
print(state_benchmark.to_string(index=False))

if len(state_benchmark) > 0:
    s_peers = state_benchmark['Provider_Count'].iloc[0]
    s_p25 = state_benchmark['P25'].iloc[0]
    s_median = state_benchmark['Median'].iloc[0]
    s_p75 = state_benchmark['P75'].iloc[0]
    s_mean = state_benchmark['Mean'].iloc[0]
    
    print(f"\nPeer Count: {s_peers}")
    print(f"P25:  ${s_p25:,.2f}")
    print(f"P50:  ${s_median:,.2f}")
    print(f"P75:  ${s_p75:,.2f}")
    print(f"Mean: ${s_mean:,.2f}")

In [None]:
# 3. Hospital Type Benchmark
print("\n\n3. HOSPITAL TYPE BENCHMARK")
print("=" * 80)
print(f"Hospital Type: {HOSPITAL_TYPE}")

type_benchmark = kpi_con.execute("""
    SELECT 
        Benchmark_Level,
        Hospital_Type,
        Fiscal_Year,
        KPI_Name,
        Provider_Count,
        P25,
        Median,
        P75,
        Mean
    FROM hospital_benchmarks
    WHERE Benchmark_Level = 'Hospital_Type'
        AND Hospital_Type = ?
        AND Fiscal_Year = ?
        AND KPI_Name = 'Operating_Expense_Ratio'
""", [HOSPITAL_TYPE, FISCAL_YEAR]).df()

print("\nBenchmark Statistics:")
print(type_benchmark.to_string(index=False))

if len(type_benchmark) > 0:
    t_peers = type_benchmark['Provider_Count'].iloc[0]
    t_p25 = type_benchmark['P25'].iloc[0]
    t_median = type_benchmark['Median'].iloc[0]
    t_p75 = type_benchmark['P75'].iloc[0]
    t_mean = type_benchmark['Mean'].iloc[0]
    
    print(f"\nPeer Count: {t_peers}")
    print(f"P25:  ${t_p25:,.2f}")
    print(f"P50:  ${t_median:,.2f}")
    print(f"P75:  ${t_p75:,.2f}")
    print(f"Mean: ${t_mean:,.2f}")

In [None]:
# 4. National Benchmark
print("\n\n4. NATIONAL BENCHMARK")
print("=" * 80)

national_benchmark = kpi_con.execute("""
    SELECT 
        Benchmark_Level,
        Fiscal_Year,
        KPI_Name,
        Provider_Count,
        P25,
        Median,
        P75,
        Mean
    FROM hospital_benchmarks
    WHERE Benchmark_Level = 'National'
        AND Fiscal_Year = ?
        AND KPI_Name = 'Operating_Expense_Ratio'
""", [FISCAL_YEAR]).df()

print("\nBenchmark Statistics:")
print(national_benchmark.to_string(index=False))

if len(national_benchmark) > 0:
    n_peers = national_benchmark['Provider_Count'].iloc[0]
    n_p25 = national_benchmark['P25'].iloc[0]
    n_median = national_benchmark['Median'].iloc[0]
    n_p75 = national_benchmark['P75'].iloc[0]
    n_mean = national_benchmark['Mean'].iloc[0]
    
    print(f"\nPeer Count: {n_peers}")
    print(f"P25:  ${n_p25:,.2f}")
    print(f"P50:  ${n_median:,.2f}")
    print(f"P75:  ${n_p75:,.2f}")
    print(f"Mean: ${n_mean:,.2f}")

## 6. Performance Comparison

Compare hospital performance against all benchmark levels

In [None]:
def calculate_percentile_rank(value, p25, median, p75):
    """Determine which quartile the value falls into"""
    if pd.isna(value) or p25 is None or median is None or p75 is None:
        return None, 'secondary'
    if value <= p25:
        return 'Bottom Quartile', 'success'  # Lower is better for expense
    elif value <= median:
        return 'Below Median', 'info'
    elif value <= p75:
        return 'Above Median', 'warning'
    else:
        return 'Top Quartile', 'danger'  # Higher is worse for expense

print("PERFORMANCE COMPARISON")
print("=" * 80)
print(f"Hospital Value: ${precomputed_value:,.2f}")
print()

# Compare against each benchmark level
comparison_data = []

if len(state_type_benchmark) > 0:
    rank, color = calculate_percentile_rank(precomputed_value, st_p25, st_median, st_p75)
    comparison_data.append({
        'Benchmark Level': 'State + Hospital Type',
        'Peers': st_peers,
        'Median': st_median,
        'Hospital Value': precomputed_value,
        'Performance': rank,
        'vs Median': ((precomputed_value - st_median) / st_median * 100) if st_median else None
    })

if len(state_benchmark) > 0:
    rank, color = calculate_percentile_rank(precomputed_value, s_p25, s_median, s_p75)
    comparison_data.append({
        'Benchmark Level': 'State',
        'Peers': s_peers,
        'Median': s_median,
        'Hospital Value': precomputed_value,
        'Performance': rank,
        'vs Median': ((precomputed_value - s_median) / s_median * 100) if s_median else None
    })

if len(type_benchmark) > 0:
    rank, color = calculate_percentile_rank(precomputed_value, t_p25, t_median, t_p75)
    comparison_data.append({
        'Benchmark Level': 'Hospital Type',
        'Peers': t_peers,
        'Median': t_median,
        'Hospital Value': precomputed_value,
        'Performance': rank,
        'vs Median': ((precomputed_value - t_median) / t_median * 100) if t_median else None
    })

if len(national_benchmark) > 0:
    rank, color = calculate_percentile_rank(precomputed_value, n_p25, n_median, n_p75)
    comparison_data.append({
        'Benchmark Level': 'National',
        'Peers': n_peers,
        'Median': n_median,
        'Hospital Value': precomputed_value,
        'Performance': rank,
        'vs Median': ((precomputed_value - n_median) / n_median * 100) if n_median else None
    })

comparison_df = pd.DataFrame(comparison_data)
print(comparison_df.to_string(index=False))
print()
print("Note: Lower values are better for Operating Expense per Adjusted Discharge")

## 7. Visualization: Benchmark Comparison

In [None]:
# Create visualization comparing hospital to all benchmarks
fig = go.Figure()

# Add benchmark levels
benchmark_names = []
p25_values = []
median_values = []
p75_values = []

if len(state_type_benchmark) > 0:
    benchmark_names.append(f'State+Type\n({st_peers} peers)')
    p25_values.append(st_p25)
    median_values.append(st_median)
    p75_values.append(st_p75)

if len(state_benchmark) > 0:
    benchmark_names.append(f'State\n({s_peers} peers)')
    p25_values.append(s_p25)
    median_values.append(s_median)
    p75_values.append(s_p75)

if len(type_benchmark) > 0:
    benchmark_names.append(f'Hospital Type\n({t_peers} peers)')
    p25_values.append(t_p25)
    median_values.append(t_median)
    p75_values.append(t_p75)

if len(national_benchmark) > 0:
    benchmark_names.append(f'National\n({n_peers} peers)')
    p25_values.append(n_p25)
    median_values.append(n_median)
    p75_values.append(n_p75)

# Add box plot for each benchmark
for i, name in enumerate(benchmark_names):
    fig.add_trace(go.Box(
        name=name,
        q1=[p25_values[i]],
        median=[median_values[i]],
        q3=[p75_values[i]],
        lowerfence=[p25_values[i] * 0.8],
        upperfence=[p75_values[i] * 1.2],
        marker_color='lightblue',
        boxmean=False
    ))

# Add hospital value as scatter point
fig.add_trace(go.Scatter(
    x=benchmark_names,
    y=[precomputed_value] * len(benchmark_names),
    mode='markers',
    marker=dict(size=15, color='red', symbol='diamond'),
    name=f'Hospital {CCN}'
))

fig.update_layout(
    title=f'Operating Expense per Adjusted Discharge - Benchmark Comparison<br>Hospital {CCN}, Fiscal Year {FISCAL_YEAR}',
    yaxis_title='Operating Expense per Adjusted Discharge ($)',
    xaxis_title='Benchmark Level',
    showlegend=True,
    height=500,
    hovermode='closest'
)

fig.show()

## 8. Trend Analysis (5-Year History)

In [None]:
# Get 5-year trend for this hospital
trend_data = kpi_con.execute("""
    SELECT 
        Provider_Number,
        Fiscal_Year,
        Operating_Expense_Ratio as Operating_Expense_per_Adjusted_Discharge
    FROM hospital_kpis
    WHERE Provider_Number = ?
        AND Fiscal_Year >= 2020
    ORDER BY Fiscal_Year
""", [CCN]).df()

print("5-YEAR TREND")
print("=" * 80)
print(trend_data.to_string(index=False))

# Calculate year-over-year change
if len(trend_data) > 1:
    most_recent = trend_data.iloc[-1]['Operating_Expense_per_Adjusted_Discharge']
    previous = trend_data.iloc[-2]['Operating_Expense_per_Adjusted_Discharge']
    
    if not pd.isna(most_recent) and not pd.isna(previous) and previous != 0:
        yoy_change = ((most_recent - previous) / previous) * 100
        print(f"\nYear-over-Year Change: {yoy_change:+.2f}%")
        if yoy_change > 0:
            print("  ⚠ Operating expense per discharge INCREASED (worse)")
        else:
            print("  ✓ Operating expense per discharge DECREASED (better)")

In [None]:
# Visualize trend
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=trend_data['Fiscal_Year'],
    y=trend_data['Operating_Expense_per_Adjusted_Discharge'],
    mode='lines+markers',
    name=f'Hospital {CCN}',
    line=dict(color='#2C3E50', width=3),
    marker=dict(size=10)
))

# Add median benchmark line for comparison
if len(state_type_benchmark) > 0:
    fig.add_hline(
        y=st_median,
        line_dash="dash",
        line_color="red",
        annotation_text=f"State+Type Median: ${st_median:,.2f}"
    )

fig.update_layout(
    title=f'Operating Expense per Adjusted Discharge - 5-Year Trend<br>Hospital {CCN}',
    xaxis_title='Fiscal Year',
    yaxis_title='Operating Expense per Adjusted Discharge ($)',
    hovermode='x unified',
    height=500
)

fig.show()

## 9. Summary Card Replication

This section replicates what would be shown on the KPI card in the dashboard

In [None]:
print("=" * 80)
print("KPI CARD SUMMARY")
print("=" * 80)
print()
print(f"Hospital: {CCN}")
print(f"State: {STATE_CODE}, Type: {HOSPITAL_TYPE}")
print(f"Fiscal Year: {FISCAL_YEAR}")
print()
print(f"Operating Expense per Adjusted Discharge: ${precomputed_value:,.2f}")
print()
print("Benchmark Comparison:")
print("-" * 80)

for _, row in comparison_df.iterrows():
    level = row['Benchmark Level']
    peers = row['Peers']
    median = row['Median']
    perf = row['Performance']
    vs_median = row['vs Median']
    
    print(f"{level:25s} | {peers:3.0f} peers | Median: ${median:8,.2f} | {perf:20s} | {vs_median:+6.2f}%")

print()
print("5-Year Trend:")
print("-" * 80)
for _, row in trend_data.iterrows():
    year = row['Fiscal_Year']
    value = row['Operating_Expense_per_Adjusted_Discharge']
    print(f"  {year}: ${value:,.2f}")

if len(trend_data) > 1:
    print(f"\n  Year-over-Year Change: {yoy_change:+.2f}%")

print()
print("=" * 80)

## 10. Cleanup

In [None]:
# Close database connections
kpi_con.close()
worksheet_con.close()

print("✓ Database connections closed")