## Hospital Financial Reports Explorer (Parquet Version)

In [1]:
# Import required libraries
import duckdb
import pandas as pd
import numpy as np
from pathlib import Path
import ipywidgets as widgets
from IPython.display import display, HTML
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.float_format', '{:.2f}'.format)

print("‚úì Libraries imported successfully")

‚úì Libraries imported successfully


In [2]:
# Define parquet paths
DATA_ROOT = Path('data/db_parquets')

BALANCE_SHEET_PATH = str(DATA_ROOT / 'balance_sheet_long' / '**/*.parquet')
FUND_BALANCE_CHANGES_PATH = str(DATA_ROOT / 'fund_balance_changes_long' / '**/*.parquet')
REVENUE_PATH = str(DATA_ROOT / 'revenue_long' / '**/*.parquet')
REVENUE_EXPENSES_PATH = str(DATA_ROOT / 'revenue_expenses_long' / '**/*.parquet')
COSTS_A000_PATH = str(DATA_ROOT / 'costs_a000_long' / '**/*.parquet')
COSTS_B100_PATH = str(DATA_ROOT / 'costs_b100_long' / '**/*.parquet')

print(f"‚úì Data root: {DATA_ROOT}")
print(f"‚úì Parquet paths configured")

‚úì Data root: data\db_parquets
‚úì Parquet paths configured


In [3]:
# Get available hospitals and years from parquet files
con = duckdb.connect(':memory:')

# Get hospitals with their metadata
hospitals_df = con.execute(f"""
    SELECT DISTINCT 
        Provider_Number,
        State_Code
    FROM read_parquet('{BALANCE_SHEET_PATH}', hive_partitioning=1)
    ORDER BY Provider_Number
""").df()

# Get available fiscal years
years_df = con.execute(f"""
    SELECT DISTINCT Fiscal_Year
    FROM read_parquet('{BALANCE_SHEET_PATH}', hive_partitioning=1)
    ORDER BY Fiscal_Year DESC
""").df()

con.close()

print(f"‚úì Found {len(hospitals_df)} hospitals")
print(f"‚úì Fiscal years available: {', '.join(map(str, years_df['Fiscal_Year'].tolist()))}")

‚úì Found 104 hospitals
‚úì Fiscal years available: 2024, 2023, 2022, 2021, 2020


## Selection Controls

Select a hospital (CCN) and fiscal year to view financial reports.

In [4]:
# Create selection widgets
ccn_options = [(f"{row['Provider_Number']} - {row['State_Code']}", row['Provider_Number']) 
               for _, row in hospitals_df.iterrows()]

ccn_dropdown = widgets.Dropdown(
    options=ccn_options,
    description='Hospital CCN:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='400px')
)

year_dropdown = widgets.Dropdown(
    options=[(str(year), year) for year in years_df['Fiscal_Year'].tolist()],
    description='Fiscal Year:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='200px')
)

fund_dropdown = widgets.Dropdown(
    options=['General Fund', 'Specific Purpose Fund', 'Endowment Fund', 'Plant Fund'],
    value='General Fund',
    description='Fund Type:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='300px')
)

display(widgets.HBox([ccn_dropdown, year_dropdown, fund_dropdown]))
print("\nüëÜ Select hospital, year, and fund type above")

HBox(children=(Dropdown(description='Hospital CCN:', layout=Layout(width='400px'), options=(('31300 - 31', np.‚Ä¶


üëÜ Select hospital, year, and fund type above


## Helper Functions

In [5]:
def format_millions(value):
    """Format value in millions with 2 decimals"""
    if pd.isna(value) or value == 0:
        return 0.00
    return round(value / 1e6, 2)

def style_dataframe(df):
    """Apply styling to dataframe for better readability"""
    return df.style.format({
        col: '{:.2f}' for col in df.select_dtypes(include=[np.number]).columns
    }).set_properties(**{
        'text-align': 'right'
    }, subset=df.select_dtypes(include=[np.number]).columns).set_properties(**{
        'text-align': 'left'
    }, subset=df.select_dtypes(include=['object']).columns)

print("‚úì Helper functions defined")

‚úì Helper functions defined


## 1Ô∏è‚É£ Balance Sheet

In [6]:
def get_balance_sheet(ccn, year, fund_type='General Fund'):
    """Get balance sheet for a specific hospital, year, and fund type"""
    con = duckdb.connect(':memory:')
    
    df = con.execute(f"""
        SELECT
            Acc_level1 as Category,
            Acc_level2 as Subcategory,
            Acc_name as Account,
            Line as Line_Number,
            Value
        FROM read_parquet('{BALANCE_SHEET_PATH}', hive_partitioning=1)
        WHERE Provider_Number = ?
            AND Fiscal_Year = ?
            AND Column_name = ?
        ORDER BY Line
    """, [int(ccn), int(year), fund_type]).df()
    
    con.close()
    
    if df.empty:
        print(f"‚ö†Ô∏è No balance sheet data found for CCN {ccn}, Year {year}, Fund {fund_type}")
        return None
    
    # Convert to millions
    df['Value ($ Millions)'] = df['Value'].apply(format_millions)
    df = df.drop('Value', axis=1)
    
    return df

# Get and display balance sheet
balance_sheet = get_balance_sheet(ccn_dropdown.value, year_dropdown.value, fund_dropdown.value)

if balance_sheet is not None:
    print(f"\nüìä Balance Sheet - {fund_dropdown.value}")
    print(f"CCN: {ccn_dropdown.value} | Fiscal Year: {year_dropdown.value}")
    print(f"Note: All amounts in millions (USD)")
    print(f"Total records: {len(balance_sheet):,}\n")
    display(style_dataframe(balance_sheet))


üìä Balance Sheet - General Fund
CCN: 31300 | Fiscal Year: 2024
Note: All amounts in millions (USD)
Total records: 32



Unnamed: 0,Category,Subcategory,Account,Line_Number,Value ($ Millions)
0,Balance,Assets,Cash On Hand And In Banks,100,0.79
1,Balance,Assets,Temporary Investments,200,1.87
2,Balance,Assets,Accounts Receivable,400,25.16
3,Balance,Assets,Other Receivables,500,0.62
4,Balance,Assets,Allowances For Uncollectible Notes And Accounts Receivable,600,-17.7
5,Balance,Assets,Inventory,700,1.14
6,Balance,Assets,Prepaid Expenses,800,0.43
7,Balance,Assets,Total Current Assets,1100,12.31
8,Balance,Assets,Land,1200,1.92
9,Balance,Assets,Land Improvements,1300,0.56


In [7]:
def get_balance_sheet(ccn, year, fund_type='General Fund'):
    con = duckdb.connect(':memory:')
    
    df = con.execute(f"""
        SELECT
            *
        FROM read_parquet('{BALANCE_SHEET_PATH}', hive_partitioning=1)
        WHERE Provider_Number = ?
            AND Fiscal_Year = ?
    """, [int(ccn), int(year)]).df()
    
    con.close()
    
    if df.empty:
        print(f"‚ö†Ô∏è No balance sheet data found for CCN {ccn}, Year {year}, Fund {fund_type}")
        return None
    
    # Convert to millions
    df['Value ($ Millions)'] = df['Value'].apply(format_millions)
    df = df.drop('Value', axis=1)
    
    return df

# Get and display balance sheet
balance_sheet = get_balance_sheet(ccn_dropdown.value, year_dropdown.value, fund_dropdown.value)

balance_sheet

Unnamed: 0,Provider_Number,Year,Account_Name,NPI,Control_Type,Report_Status,FY_Begin,FY_End,Geographic_Code,Worksheet,Line,Column,Column_name,Acc_level1,Acc_level2,Acc_level3,Acc_name,Fiscal_Year,State_Code,Value ($ Millions)
0,31300,2024,General_Fund_BAL_ASSE_CURR_Accounts_receivable,,2,1,01/01/2024,2024-12-31,5901,G000000,400,100,General Fund,Balance,Assets,Current Assets,Accounts Receivable,2024,31,25.16
1,31300,2024,General_Fund_BAL_ASSE_CURR_Allowances_for_unco...,,2,1,01/01/2024,2024-12-31,5901,G000000,600,100,General Fund,Balance,Assets,Current Assets,Allowances For Uncollectible Notes And Account...,2024,31,-17.7
2,31300,2024,General_Fund_BAL_ASSE_CURR_Cash_on_hand_and_in...,,2,1,01/01/2024,2024-12-31,5901,G000000,100,100,General Fund,Balance,Assets,Current Assets,Cash On Hand And In Banks,2024,31,0.79
3,31300,2024,General_Fund_BAL_ASSE_CURR_Inventory,,2,1,01/01/2024,2024-12-31,5901,G000000,700,100,General Fund,Balance,Assets,Current Assets,Inventory,2024,31,1.14
4,31300,2024,General_Fund_BAL_ASSE_CURR_Other_receivables,,2,1,01/01/2024,2024-12-31,5901,G000000,500,100,General Fund,Balance,Assets,Current Assets,Other Receivables,2024,31,0.62
5,31300,2024,General_Fund_BAL_ASSE_CURR_Prepaid_expenses,,2,1,01/01/2024,2024-12-31,5901,G000000,800,100,General Fund,Balance,Assets,Current Assets,Prepaid Expenses,2024,31,0.43
6,31300,2024,General_Fund_BAL_ASSE_CURR_Temporary_investments,,2,1,01/01/2024,2024-12-31,5901,G000000,200,100,General Fund,Balance,Assets,Current Assets,Temporary Investments,2024,31,1.87
7,31300,2024,General_Fund_BAL_ASSE_CURR_Total_current_asset...,,2,1,01/01/2024,2024-12-31,5901,G000000,1100,100,General Fund,Balance,Assets,Current Assets,Total Current Assets,2024,31,12.31
8,31300,2024,General_Fund_BAL_ASSE_FIXE_Accumulated_depreci...,,2,1,01/01/2024,2024-12-31,5901,G000000,1400,100,General Fund,Balance,Assets,Fixed Assets,Accumulated Depreciation,2024,31,-0.31
9,31300,2024,General_Fund_BAL_ASSE_FIXE_Accumulated_depreci...,,2,1,01/01/2024,2024-12-31,5901,G000000,1600,100,General Fund,Balance,Assets,Fixed Assets,Accumulated Depreciation,2024,31,-10.86


## 2Ô∏è‚É£ Fund Balance Changes

In [8]:
def get_fund_balance_changes(ccn, year):
    """Get fund balance changes for a specific hospital and year"""
    con = duckdb.connect(':memory:')
    
    df = con.execute(f"""
        SELECT
            Acc_level1 as Category,
            Acc_level2 as Subcategory,
            Acc_name as Account,
            Line as Line_Number,
            Value
        FROM read_parquet('{FUND_BALANCE_CHANGES_PATH}', hive_partitioning=1)
        WHERE Provider_Number = ?
            AND Fiscal_Year = ?
        ORDER BY Line
    """, [int(ccn), int(year)]).df()
    
    con.close()
    
    if df.empty:
        print(f"‚ö†Ô∏è No fund balance changes data found for CCN {ccn}, Year {year}")
        return None
    
    # Convert to millions
    df['Value ($ Millions)'] = df['Value'].apply(format_millions)
    df = df.drop('Value', axis=1)
    
    return df

# Get and display fund balance changes
fund_changes = get_fund_balance_changes(ccn_dropdown.value, year_dropdown.value)

if fund_changes is not None:
    print(f"\nüíº Fund Balance Changes")
    print(f"CCN: {ccn_dropdown.value} | Fiscal Year: {year_dropdown.value}")
    print(f"Note: All amounts in millions (USD)")
    print(f"Total records: {len(fund_changes):,}\n")
    display(style_dataframe(fund_changes))


üíº Fund Balance Changes
CCN: 31300 | Fiscal Year: 2024
Note: All amounts in millions (USD)
Total records: 7



Unnamed: 0,Category,Subcategory,Account,Line_Number,Value ($ Millions)
0,Changes in Fund Balances,,Fund balances at beginning of period,100,5.58
1,Changes in Fund Balances,,Net income (loss),200,1.31
2,Changes in Fund Balances,,Total,300,6.89
3,Changes in Fund Balances,,Subtotal,1100,6.89
4,Changes in Fund Balances,,Deduction 1,1200,0.26
5,Changes in Fund Balances,,Total deductions,1800,0.26
6,Changes in Fund Balances,,Fund balance at end of period per balance sheet,1900,6.63


## 3Ô∏è‚É£ Revenue Detail

In [9]:
def get_revenue(ccn, year):
    """Get revenue detail for a specific hospital and year"""
    con = duckdb.connect(':memory:')
    
    df = con.execute(f"""
        SELECT
            Revenue_Group as Revenue_Category,
            Revenue_Subgroup as Revenue_Subcategory,
            Revenue_Center,
            Revenue_Line_Name as Account,
            Line as Line_Number,
            Value
        FROM read_parquet('{REVENUE_PATH}', hive_partitioning=1)
        WHERE Provider_Number = ?
            AND Fiscal_Year = ?
        ORDER BY Line
    """, [int(ccn), int(year)]).df()
    
    con.close()
    
    if df.empty:
        print(f"‚ö†Ô∏è No revenue data found for CCN {ccn}, Year {year}")
        return None
    
    # Convert to millions
    df['Value ($ Millions)'] = df['Value'].apply(format_millions)
    df = df.drop('Value', axis=1)
    
    return df

# Get and display revenue
revenue = get_revenue(ccn_dropdown.value, year_dropdown.value)

if revenue is not None:
    print(f"\nüí∞ Revenue Detail")
    print(f"CCN: {ccn_dropdown.value} | Fiscal Year: {year_dropdown.value}")
    print(f"Note: All amounts in millions (USD)")
    print(f"Total records: {len(revenue):,}\n")
    display(style_dataframe(revenue))


üí∞ Revenue Detail
CCN: 31300 | Fiscal Year: 2024
Note: All amounts in millions (USD)
Total records: 19



Unnamed: 0,Revenue_Category,Revenue_Subcategory,Revenue_Center,Account,Line_Number,Value ($ Millions)
0,Patient Revenues,General Inpatient Routine Care Services,Inpatient,PatRev IPgral Hospital,100,2.52
1,Patient Revenues,General Inpatient Routine Care Services,Total,PatRev IPgral Hospital,100,2.52
2,Patient Revenues,General Inpatient Routine Care Services,Inpatient,PatRev IPgral Total general inpatient care services,1000,2.52
3,Patient Revenues,General Inpatient Routine Care Services,Total,PatRev IPgral Total general inpatient care services,1000,2.52
4,Patient Revenues,Intensive Care Type Inpatient Hospital Services,Inpatient,PatRev IC_IP Total inpatient routine care services,1700,2.52
5,Patient Revenues,Intensive Care Type Inpatient Hospital Services,Total,PatRev IC_IP Total inpatient routine care services,1700,2.52
6,Patient Revenues,Intensive Care Type Inpatient Hospital Services,Inpatient,PatRev IC_IP Ancillary services,1800,3.25
7,Patient Revenues,Intensive Care Type Inpatient Hospital Services,Outpatient,PatRev IC_IP Ancillary services,1800,14.96
8,Patient Revenues,Intensive Care Type Inpatient Hospital Services,Total,PatRev IC_IP Ancillary services,1800,18.22
9,Patient Revenues,Intensive Care Type Inpatient Hospital Services,Inpatient,PatRev IC_IP Outpatient services,1900,0.06


## 4Ô∏è‚É£ Revenue & Expenses Statement

In [10]:
def get_revenue_expenses(ccn, year):
    """Get revenue & expenses statement for a specific hospital and year"""
    con = duckdb.connect(':memory:')
    
    df = con.execute(f"""
        SELECT
            RE_Report as Report_Section,
            RE_Level as Level,
            RE_Account as Account_Type,
            RE_Line_Name as Account,
            Line as Line_Number,
            Value
        FROM read_parquet('{REVENUE_EXPENSES_PATH}', hive_partitioning=1)
        WHERE Provider_Number = ?
            AND Fiscal_Year = ?
        ORDER BY Line
    """, [int(ccn), int(year)]).df()
    
    con.close()
    
    if df.empty:
        print(f"‚ö†Ô∏è No revenue & expenses data found for CCN {ccn}, Year {year}")
        return None
    
    # Convert to millions
    df['Value ($ Millions)'] = df['Value'].apply(format_millions)
    df = df.drop('Value', axis=1)
    
    return df

# Get and display revenue & expenses
revenue_expenses = get_revenue_expenses(ccn_dropdown.value, year_dropdown.value)

if revenue_expenses is not None:
    print(f"\nüìà Revenue & Expenses Statement")
    print(f"CCN: {ccn_dropdown.value} | Fiscal Year: {year_dropdown.value}")
    print(f"Note: All amounts in millions (USD)")
    print(f"Total records: {len(revenue_expenses):,}\n")
    display(style_dataframe(revenue_expenses))


üìà Revenue & Expenses Statement
CCN: 31300 | Fiscal Year: 2024
Note: All amounts in millions (USD)
Total records: 12



Unnamed: 0,Report_Section,Level,Account_Type,Account,Line_Number,Value ($ Millions)
0,Revenues and expenses,1.0,Total Patient Revenues,Rev&Exp Total patient revenues,100,153.89
1,Revenues and expenses,1.0,Less Contractual Allowances And Discounts On Patients' Accounts,Rev&Exp Less contractual allowances and discounts on patients' accounts,200,109.28
2,Revenues and expenses,1.0,Net Patient Revenues,Rev&Exp Net patient revenues,300,44.61
3,Revenues and expenses,1.0,Less Total Operating Expenses,Rev&Exp Less total operating expenses,400,51.51
4,Revenues and expenses,1.0,Net Income From Service To Patients,Rev&Exp Net income from service to patients,500,-6.9
5,Revenues and expenses,2.0,Income From Investments,Rev&Exp Income from investments,700,0.65
6,Revenues and expenses,2.0,Revenue From Sale Of Medical Records And Abstracts,Rev&Exp Revenue from sale of medical records and abstracts,1800,0.01
7,Revenues and expenses,2.0,Rental Of Hospital Space,Rev&Exp Rental of hospital space,2200,0.09
8,Revenues and expenses,3.0,Other Income,Rev&Exp Other (specify),2400,7.46
9,Revenues and expenses,1.0,Total Other Income,Rev&Exp Total other income,2500,8.2


## 5Ô∏è‚É£ Cost Summary (from B100)

In [11]:
def get_cost_summary(ccn, year):
    """Get cost summary from B100 (total costs by cost center)"""
    con = duckdb.connect(':memory:')
    
    df = con.execute(f"""
        SELECT
            CAST(Line AS INTEGER) as Line_Number,
                     \"Column\", 
            CAST(\"Column\" AS INTEGER) as Column_Number,
            Account_group as Cost_Category,
            Account_name as Cost_Center,
            Value
        FROM read_parquet('{COSTS_B100_PATH}', hive_partitioning=1)
        WHERE Provider_Number = ?
            AND Fiscal_Year = ?
            AND CAST(Line AS INTEGER) >= 3000
            AND CAST(Line AS INTEGER) <= 20200
            --AND CAST(\"Column\" AS INTEGER) = 2600
            AND \"Column\" = '00000'
        ORDER BY CAST(Line AS INTEGER)
    """, [int(ccn), int(year)]).df()
    
    con.close()
    
    if df.empty:
        print(f"‚ö†Ô∏è No cost summary data found for CCN {ccn}, Year {year}")
        return None
    
    # Convert to millions
    df['Value ($ Millions)'] = df['Value'].apply(format_millions)
    df = df.drop('Value', axis=1)
    
    return df

# Get and display cost summary
cost_summary = get_cost_summary(ccn_dropdown.value, year_dropdown.value)

if cost_summary is not None:
    print(f"\nüí∏ Cost Summary")
    print(f"CCN: {ccn_dropdown.value} | Fiscal Year: {year_dropdown.value}")
    print(f"Note: All amounts in millions (USD)")
    print(f"Total records: {len(cost_summary):,}\n")
    display(style_dataframe(cost_summary))


üí∏ Cost Summary
CCN: 31300 | Fiscal Year: 2024
Note: All amounts in millions (USD)
Total records: 72



Unnamed: 0,Line_Number,Column,Column_Number,Cost_Category,Cost_Center,Value ($ Millions)
0,3000.0,0,0.0,,,2.78
1,3000.0,0,0.0,,,2.78
2,3000.0,0,0.0,,,2.78
3,3000.0,0,0.0,,,2.78
4,5000.0,0,0.0,,,3.07
5,5000.0,0,0.0,,,3.07
6,5000.0,0,0.0,,,3.07
7,5000.0,0,0.0,,,3.07
8,5400.0,0,0.0,,,2.55
9,5400.0,0,0.0,,,2.55


In [12]:
def get_cost_summary(ccn, year):
    """Get cost summary from B100 (total costs by cost center)"""
    con = duckdb.connect(':memory:')
    
    df = con.execute(f"""
        SELECT
            *
        FROM read_parquet('{COSTS_B100_PATH}', hive_partitioning=1)
        WHERE Provider_Number = ?
            AND Fiscal_Year = ?
    """, [int(ccn), int(year)]).df()
    
    con.close()
    
    if df.empty:
        print(f"‚ö†Ô∏è No cost summary data found for CCN {ccn}, Year {year}")
        return None
    
    # Convert to millions
    df['Value ($ Millions)'] = df['Value'].apply(format_millions)
    df = df.drop('Value', axis=1)
    
    return df

# Get and display cost summary
cost_summary = get_cost_summary(ccn_dropdown.value, year_dropdown.value)

cost_summary

Unnamed: 0,Provider_Number,Year,NPI,Control_Type,Report_Status,FY_Begin,FY_End,Geographic_Code,Worksheet,Line,Column,Account_group,Account_name,Overhead_center,Fiscal_Year,State_Code,Value ($ Millions)
0,31300.00,2024.00,,2.00,1.00,01/01/2024,2024-12-31,5901.00,B000001,00100,00000,General Service Cost Centers,New Capital Buildings and Fixtures,Net Expenses,2024,31,1.10
1,31300.00,2024.00,,2.00,1.00,01/01/2024,2024-12-31,5901.00,B000001,00100,00100,,,,2024,31,1.10
2,31300.00,2024.00,,2.00,1.00,01/01/2024,2024-12-31,5901.00,B000001,00200,00000,General Service Cost Centers,New Capital Equipment,Net Expenses,2024,31,1.60
3,31300.00,2024.00,,2.00,1.00,01/01/2024,2024-12-31,5901.00,B000001,00200,00200,,,,2024,31,1.60
4,31300.00,2024.00,,2.00,1.00,01/01/2024,2024-12-31,5901.00,B000001,00400,00000,General Service Cost Centers,Employee Benefits,Net Expenses,2024,31,0.46
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1413,31300.00,2024.00,,2.00,1.00,01/01/2024,2024-12-31,5901.00,B000001,20200,01400,,,,2024,31,0.34
1414,31300.00,2024.00,,2.00,1.00,01/01/2024,2024-12-31,5901.00,B000001,20200,01500,,,,2024,31,2.70
1415,31300.00,2024.00,,2.00,1.00,01/01/2024,2024-12-31,5901.00,B000001,20200,01600,,,,2024,31,1.24
1416,31300.00,2024.00,,2.00,1.00,01/01/2024,2024-12-31,5901.00,B000001,20200,02400,,,,2024,31,44.60


## 6Ô∏è‚É£ Detailed Costs (with Salary, Direct, and Overhead breakdown)

In [13]:
def get_detailed_costs(ccn, year):
    """Get detailed costs with salary, other direct, and overhead breakdown"""
    con = duckdb.connect(':memory:')
    
    query = f"""
    WITH
    -- Get A000 data and calculate totals
    a000_data AS (
        SELECT
            Provider_Number,
            Fiscal_Year,
            CAST(Line AS INTEGER) as Line,
            Account_name,
            MAX(CASE WHEN \"Column\" = '00100' THEN Value ELSE 0 END) as Col_100,
            MAX(CASE WHEN \"Column\" = '00200' THEN Value ELSE 0 END) as Col_200,
            MAX(CASE WHEN \"Column\" = '00400' THEN Value ELSE 0 END) as Col_400,
            MAX(CASE WHEN \"Column\" = '00600' THEN Value ELSE 0 END) as Col_600
        FROM read_parquet('{COSTS_A000_PATH}', hive_partitioning=1)
        WHERE Provider_Number = ?
          AND Fiscal_Year = ?
          AND CAST(Line AS INTEGER) >= 3000
          AND CAST(Line AS INTEGER) <= 20200
        GROUP BY Provider_Number, Fiscal_Year, CAST(Line AS INTEGER), Account_name
    ),
    a000_calc AS (
        SELECT
            *,
            (Col_100 + Col_200 + Col_400 + Col_600) as Col_700,
            CASE
                WHEN (Col_100 + Col_200) > 0
                THEN (Col_100 + Col_200 + Col_400 + Col_600) * (Col_100 / (Col_100 + Col_200))
                ELSE 0
            END as Salary,
            CASE
                WHEN (Col_100 + Col_200) > 0
                THEN (Col_100 + Col_200 + Col_400 + Col_600) * (Col_200 / (Col_100 + Col_200))
                ELSE 0
            END as Other_Direct
        FROM a000_data
    ),
    -- Get B100 overhead data
    b100_data AS (
        SELECT
            Provider_Number,
            Fiscal_Year,
            CAST(Line AS INTEGER) as Line,
            MAX(CASE WHEN \"Column\" = '00100' THEN Value ELSE 0 END) as OH_Col_100,
            MAX(CASE WHEN \"Column\" = '00200' THEN Value ELSE 0 END) as OH_Col_200,
            MAX(CASE WHEN \"Column\" = '00400' THEN Value ELSE 0 END) as OH_Col_400,
            MAX(CASE WHEN \"Column\" = '00500' THEN Value ELSE 0 END) as OH_Col_500,
            MAX(CASE WHEN \"Column\" = '02600' THEN Value ELSE 0 END) as OH_Col_2600
        FROM read_parquet('{COSTS_B100_PATH}', hive_partitioning=1)
        WHERE Provider_Number = ?
          AND Fiscal_Year = ?
          AND CAST(Line AS INTEGER) >= 3000
          AND CAST(Line AS INTEGER) <= 20200
        GROUP BY Provider_Number, Fiscal_Year, CAST(Line AS INTEGER)
    ),
    b100_calc AS (
        SELECT
            *,
            (OH_Col_100 + OH_Col_200) as Bldg_Equipment,
            OH_Col_400 as Employee_Benefits,
            OH_Col_500 as Admin_General,
            (OH_Col_2600 - OH_Col_100 - OH_Col_200 - OH_Col_400 - OH_Col_500) as Other_Overhead
        FROM b100_data
    )
    -- Join and combine all data
    SELECT
        COALESCE(a.Line, b.Line) as Line_Number,
        a.Account_name as Cost_Center,
        a.Salary,
        a.Other_Direct,
        b.Bldg_Equipment,
        b.Employee_Benefits,
        b.Admin_General,
        b.Other_Overhead,
        (COALESCE(a.Salary, 0) + COALESCE(a.Other_Direct, 0) +
         COALESCE(b.Bldg_Equipment, 0) + COALESCE(b.Employee_Benefits, 0) +
         COALESCE(b.Admin_General, 0) + COALESCE(b.Other_Overhead, 0)) as Total_Cost
    FROM a000_calc a
    FULL OUTER JOIN b100_calc b
        ON a.Provider_Number = b.Provider_Number
        AND a.Fiscal_Year = b.Fiscal_Year
        AND a.Line = b.Line
    ORDER BY Line_Number
    """
    
    df = con.execute(query, [int(ccn), int(year), int(ccn), int(year)]).df()
    
    con.close()
    
    if df.empty:
        print(f"‚ö†Ô∏è No detailed costs data found for CCN {ccn}, Year {year}")
        return None
    
    # Convert all numeric columns to millions
    for col in ['Salary', 'Other_Direct', 'Bldg_Equipment', 'Employee_Benefits', 'Admin_General', 'Other_Overhead', 'Total_Cost']:
        if col in df.columns:
            df[f'{col} ($M)'] = df[col].apply(format_millions)
            df = df.drop(col, axis=1)
    
    return df

# Get and display detailed costs
detailed_costs = get_detailed_costs(ccn_dropdown.value, year_dropdown.value)

if detailed_costs is not None:
    print(f"\nüìä Detailed Costs (with Salary, Direct, and Overhead breakdown)")
    print(f"CCN: {ccn_dropdown.value} | Fiscal Year: {year_dropdown.value}")
    print(f"Note: All amounts in millions (USD)")
    print(f"Total records: {len(detailed_costs):,}\n")
    display(style_dataframe(detailed_costs))


üìä Detailed Costs (with Salary, Direct, and Overhead breakdown)
CCN: 31300 | Fiscal Year: 2024
Note: All amounts in millions (USD)
Total records: 39



Unnamed: 0,Line_Number,Cost_Center,Salary ($M),Other_Direct ($M),Bldg_Equipment ($M),Employee_Benefits ($M),Admin_General ($M),Other_Overhead ($M),Total_Cost ($M)
0,3000.0,,0.0,0.0,0.15,1.48,2.96,-0.11,4.47
1,3000.0,Adults and Pediatrics (General Routine Care),1.48,1.59,0.15,1.48,2.96,-0.11,7.54
2,5000.0,Operating Room,1.23,5.27,0.49,1.23,3.59,-0.16,11.67
3,5000.0,,0.0,0.0,0.49,1.23,3.59,-0.16,5.16
4,5400.0,,0.0,0.0,0.78,1.46,3.37,-0.84,4.78
5,5400.0,Radiology-Diagnostic,1.46,3.69,0.78,1.46,3.37,-0.84,9.93
6,6000.0,Laboratory,1.52,1.73,0.18,1.52,3.28,-0.53,7.71
7,6000.0,,0.0,0.0,0.18,1.52,3.28,-0.53,4.46
8,6400.0,,0.0,0.0,0.01,0.15,0.22,-0.06,0.33
9,6400.0,Intravenous Therapy,0.15,0.05,0.01,0.15,0.22,-0.06,0.53


## 7Ô∏è‚É£ Export to Excel (Optional)

In [14]:
def export_to_excel(ccn, year, fund_type='General Fund'):
    """Export all financial reports to Excel file"""
    filename = f"Financial_Reports_CCN{ccn}_FY{year}_Parquet.xlsx"
    
    with pd.ExcelWriter(filename, engine='openpyxl') as writer:
        # Balance Sheet
        bs = get_balance_sheet(ccn, year, fund_type)
        if bs is not None:
            bs.to_excel(writer, sheet_name='Balance Sheet', index=False)
        
        # Fund Balance Changes
        fbc = get_fund_balance_changes(ccn, year)
        if fbc is not None:
            fbc.to_excel(writer, sheet_name='Fund Balance Changes', index=False)
        
        # Revenue
        rev = get_revenue(ccn, year)
        if rev is not None:
            rev.to_excel(writer, sheet_name='Revenue', index=False)
        
        # Revenue & Expenses
        re = get_revenue_expenses(ccn, year)
        if re is not None:
            re.to_excel(writer, sheet_name='Revenue & Expenses', index=False)
        
        # Cost Summary
        cs = get_cost_summary(ccn, year)
        if cs is not None:
            cs.to_excel(writer, sheet_name='Cost Summary', index=False)
        
        # Detailed Costs
        dc = get_detailed_costs(ccn, year)
        if dc is not None:
            dc.to_excel(writer, sheet_name='Detailed Costs', index=False)
    
    print(f"‚úì Exported to {filename}")
    return filename

# Uncomment to export:
# export_to_excel(ccn_dropdown.value, year_dropdown.value, fund_dropdown.value)