In [1]:
import pandas as pd

In [2]:
# Import Billing File
file_path = (
    r"C:\Users\pirat\Dropbox\Consulting Inc\Upperline Health\1. Payor Data & Contracts"
    r"\99-Athena Reports\ULH All Payors FL - DOS 2024 & YTD June 2025 - as of 11.17.25.xlsx"
)

# Read all sheets (dict: {sheet_name: DataFrame})
sheets_dict = pd.read_excel(file_path, sheet_name=None)

# If the file really only has those 2 relevant sheets, this will grab both
df_raw = pd.concat(sheets_dict.values(), ignore_index=True)

# delete duplicate rows
df_raw = df_raw.drop_duplicates(keep='first')

# BENCHMARK: 1m per 1 mil rows

In [13]:
# Initial cleaning
df_clean = df_raw.copy()

# change net payment from negative to positive
df_clean['Net Payment'] = df_clean['Net Payment'].abs()

# create avg paid per claim
df_clean['Avg_Paid_Per_Claim'] = (df_clean['Net Payment']/df_clean['Charge Units']).fillna(0)

# create key from Claim ID and Patient ID
df_clean["Patient_Claim_Key"] = (
    df_clean["Claim ID"].astype(str)
    + "-"
    + df_clean["Patient ID"].astype(str)
    + "-"
    + df_clean["Date of Service"].astype(str)
)

# Get "State" from first two characters of ["Service Department"]
df_clean["State"] = df_clean["Service Department"].str[:2]

In [14]:
# Import Payer Crosswalk File
file_path = (
    r"C:\Users\pirat\Dropbox\Consulting Inc\Upperline Health\1. Payor Data & Contracts\99-Athena Reports"
    r"\Payor Mapping_FIN.xlsx"
)

# Read only "Payor Mapping_FIN" sheet
df_payor_mapping = pd.read_excel(file_path, sheet_name="PayorMap_FIN")

df_payor_mapping["INSNAME_OPS_key"] = (
    df_payor_mapping["INSNAME_OPS"].str.upper().str.strip()
)

# drop unused columns
df_payor_mapping = df_payor_mapping[[
    "INSNAME_OPS_key",
    "INSNAME_OPS",
    "PAYORCAT_FIN",
    "PARENTCO_NAME_FIN"
]]

# delete duplicate rows
df_payor_mapping = df_payor_mapping.drop_duplicates("INSNAME_OPS_key", keep='first')

In [15]:
# Normalize keys to uppercase, stripped
df_clean["Primary Insurance_key"] = (
    df_clean["Primary Insurance"].str.upper().str.strip()
)

# Merge on the normalized keys
df_merged = pd.merge(
    df_clean,
    df_payor_mapping[["INSNAME_OPS_key", "INSNAME_OPS", "PAYORCAT_FIN", "PARENTCO_NAME_FIN"]],
    how="left",
    left_on="Primary Insurance_key",
    right_on="INSNAME_OPS_key",
)

# drop helper columns
df_merged = df_merged.drop(columns=["Primary Insurance_key", "INSNAME_OPS_key"])

In [16]:
import numpy as np

# base this on the raw insurance name
ins = df_merged["INSNAME_OPS"].fillna("").str.lower()

# build a guess for every row
payor_guess = np.select(
    [
        ins.str.contains("medicare") | ins.str.contains("mcr"),
        ins.str.contains("medicaid") | ins.str.contains("mcd"),
        ins.str.contains("tricare") | ins.str.contains("exchange"),
    ],
    ["MEDICARE", "MEDICAID", "OTHER"],
    default="COMMERCIAL",
)

# use it only where PAYORCAT_FIN is null
df_merged["PAYORCAT_FIN"] = df_merged["PAYORCAT_FIN"].fillna(
    pd.Series(payor_guess, index=df_merged.index)
)
