In [1]:
import os
import sys
import time
import random
import warnings
import collections
from dateutil.relativedelta import relativedelta
from datetime import datetime
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

sys.path.append('../../src')
import cb_utils

sns.set(style="darkgrid")
pd.options.display.max_columns = 500

%load_ext autoreload
%autoreload 2

# Script to generate supplemental file for claim submission
Currently configured for UHC, other plans may be signifigantly different

### Questions: 
1) Can we always pad out to 40dx
2) Can we include lines that don't have any dx?
    - If not what about situations where the dx is associated to a non ra cpt
3) What member ID to use if we don't have MBI?

In [2]:
# configuration
use_cache = False
seed = random.randint(0, 100)
from_date = '2022-03-23'

print(f'Seed: {seed}')

Seed: 32


### Pull Data

In [6]:
query = f"""
with
    good_mbi AS
        (
            SELECT
                mco_id
              , analytics_member_id
              , medicare_no
              , case WHEN medicare_no !~* 's|l|o|i|b|z' -- invalid characters in an MBI
                  AND LENGTH(REPLACE(LOWER(TRIM(medicare_no)), '-', '')) = 11
                    THEN upper((regexp_match(REGEXP_REPLACE(LOWER(TRIM(medicare_no)), '[s|l|o|i|b|z|-]', '', 'g'),
                        '[1-9][a-z][0-9a-z][0-9][a-z][0-9a-z][0-9][a-z][a-z][0-9][0-9]'))[1])
                END mbi
            FROM
                fdw_member_doc.analytics_patients
            WHERE
                medicare_no IS NOT NULL
        )
  , encounter_icds   AS (
    SELECT
        b.id                                                                      elation_bill_id
      , ARRAY_AGG(i.code ORDER BY bidx.seqno) FILTER ( WHERE i.code IS NOT NULL ) icds
    FROM
        fdw_member_doc.emr_origin_bill b
        JOIN fdw_member_doc.emr_origin_bill_item bi ON bi.bill_id = b.id AND bi.deletion_time ISNULL
        JOIN fdw_member_doc.emr_origin_visit_note vn ON vn.id = b.visit_note_id AND vn.deletion_time ISNULL
        JOIN fdw_member_doc.emr_origin_bill_item_dx bidx ON bi.id = bidx.bill_item_id
        JOIN fdw_member_doc.emr_origin_icd10 i ON i.id = bidx.icd10_id
        JOIN fdw_member_doc.patients p ON p.emr_origin_patient_id = vn.patient_id
        JOIN fdw_member_doc.payers pay ON p.payer_id = pay.id and pay.name ~* 'uhc' and pay.should_run_claiming

    WHERE
          p.payer_id IN (2, 4, 5, 6)
      AND vn.signed_by_user_id IS NOT NULL
      AND vn.signed_time IS NOT NULL
      AND vn.signed_time >= '{from_date}'
    GROUP BY
        1
)
  , encounter_ra_cpt AS (
    SELECT
        b.id                                                    elation_bill_id
      , UPPER(bi.cpt)                                           procedure_code
      , ROW_NUMBER() OVER (PARTITION BY b.id ORDER BY bi.seqno) rn
    FROM
        fdw_member_doc.emr_origin_bill b
        JOIN encounter_icds ei ON ei.elation_bill_id = b.id
        JOIN fdw_member_doc.emr_origin_bill_item bi ON bi.bill_id = b.id AND bi.deletion_time ISNULL
        JOIN fdw_member_doc.procedure_codes pc ON pc.code = bi.cpt AND pc.is_valid_for_hccs
), pre_medicare_filter_list as (
    SELECT
        b.id                                         elation_bill_id
      , p.first_name                                 patient_first_name
      , p.last_name                                  patient_last_name
      , p.dob                                        patient_dob
      , coalesce(mbi, trim(gm.medicare_no))          mbi -- DO WE WANT TO DO THIS or should this be NULL if it is not an MBI?
      , p.subscriber_id                              member_id
      , LEFT(p.gender, 1)                            gender
      , vn.document_date ::DATE                      date_of_service
      , emr_u.first_name                             provider_first_name
      , emr_u.last_name                              provider_last_name
      , emr_u.email                                  provider_email
      , emr_u.npi                                    provider_npi
      , ntc.medicare_specialty_code_int              medicare_specialty_code_int
      , '84-2590508'                                 tax_id
      , '02'                                         place_of_service
    --   , b.place_of_service
      , 'A'                                          ra_code
      --, pa.line1                                     line1
      --, pa.line2                                     line2
      --, pa.city                                      city
      --, pa.state                                     state
      --, pa.postal_code                               postal_code
      , ecpt.procedure_code                          procedure_code
      , ei.icds                                      icds
      , p.analytics_member_id
      , p.id golgi_id
    FROM
        fdw_member_doc.emr_origin_bill b
        JOIN encounter_icds ei ON ei.elation_bill_id = b.id
        JOIN encounter_ra_cpt ecpt ON ei.elation_bill_id = ecpt.elation_bill_id AND ecpt.rn = 1
        JOIN fdw_member_doc.emr_origin_visit_note vn ON vn.id = b.visit_note_id AND vn.deletion_time ISNULL
        JOIN fdw_member_doc.emr_origin_user emr_u ON emr_u.id = vn.physician_user_id
        LEFT JOIN fdw_member_doc.provider_taxonomy_codes ptc ON ptc.npi = emr_u.npi
        LEFT JOIN ref.npi_taxonomy_crosswalk ntc ON ntc.provider_taxonomy_code = ptc.taxonomy_code
        JOIN fdw_member_doc.patients p ON p.emr_origin_patient_id = vn.patient_id
        left JOIN good_mbi gm ON gm.analytics_member_id = p.analytics_member_id and mbi IS NOT NULL
        -- left JOIN fdw_member_doc.analytics_patients ap ON p.analytics_member_id = ap.analytics_member_id
        -- JOIN fdw_member_doc.patient_addresses pa ON p.id = pa.patient_id -- 1-1 for now, careful for future!
    WHERE
          p.payer_id IN (2, 4, 5, 6)
      AND vn.signed_by_user_id IS NOT NULL
      AND vn.signed_time IS NOT NULL
      -- AND ap.line_of_business_name != 'medicaid'
    ORDER BY
        vn.document_date::DATE
),
ever_medicare as (
    select
        distinct pl.analytics_member_id, pl.golgi_id
    from
        (select distinct analytics_member_id, golgi_id from pre_medicare_filter_list x) pl
        join cb.members m on m.id = pl.analytics_member_id
        join cb.eligibility_days ed on ed.member_id = pl.analytics_member_id and ed.mco_id = m.mco_id and ed.line_of_business_id in (2,3)
),
one_address as (
    select * from (
        select
            pa.*,
            row_number() over (partition by em.golgi_id order by case when pa.type = 'home' then '1' else pa.type end asc) rn
        from
            ever_medicare em
            JOIN fdw_member_doc.patient_addresses pa ON em.golgi_id = pa.patient_id
    ) addr
    where addr.rn = 1
)
select
    pml.elation_bill_id,
    pml.patient_first_name,
    pml.patient_last_name,
    pml.patient_dob,
    pml.mbi,
    pml.member_id,
    pml.gender,
    pml.date_of_service,
    pml.provider_first_name,
    pml.provider_last_name,
    pml.provider_email,
    pml.provider_npi,
    pml.medicare_specialty_code_int,
    pml.tax_id,
    pml.place_of_service,
    pml.ra_code,
    oa.line1,
    oa.line2,
    oa.city,
    oa.state,
    oa.postal_code,
    pml.procedure_code,
    pml.icds
from
    pre_medicare_filter_list pml
    join ever_medicare em on em.analytics_member_id = pml.analytics_member_id
    join one_address oa on oa.patient_id = pml.golgi_id
;
"""

In [7]:
df = cb_utils.sql_query_to_df(query, use_cache=use_cache)

Pulling query from db


In [8]:
df.shape # 4609 -> 6954

(6711, 23)

In [9]:
df.mbi.nunique(),  df.member_id.nunique()

(3855, 3862)

In [10]:
df.head()

Unnamed: 0,elation_bill_id,patient_first_name,patient_last_name,patient_dob,mbi,member_id,gender,date_of_service,provider_first_name,provider_last_name,provider_email,provider_npi,medicare_specialty_code_int,tax_id,place_of_service,ra_code,line1,line2,city,state,postal_code,procedure_code,icds
0,599715650928796,Elizabeth,Gallon,1951-03-27,8HH0UU4DN45,112236294,F,2023-05-03,Kathy,Agiri,kathy.agiri@carebridgehealth.com,1619328531,50,84-2590508,2,A,7486 E Holmes Rd,,Memphis,TN,38125,99213,"[N39.0, R05.9]"
1,493387270586524,Edith,Schlayer,1950-03-08,7GJ5T84QD50,106299767,F,2022-07-25,Jessica,Flippo,jessica.flippo@carebridgehealth.com,1194221317,50,84-2590508,2,A,601 E BARTON RIDGE RD,Apt 102,GREENEVILLE,TN,37745,99212,"[E66.01, I50.9, E26.1, E11.42, E11.42, E11.51,..."
2,600248033345692,Edith,Schlayer,1950-03-08,7GJ5T84QD50,106299767,F,2023-05-04,Leanne,Balmer,Leanne.Balmer@carebridgehealth.com,1629335120,50,84-2590508,2,A,601 E BARTON RIDGE RD,Apt 102,GREENEVILLE,TN,37745,98966,[Z51.89]
3,556769413890204,Thomas,Hayes,1982-10-25,6AH0C76FQ17,112236182,M,2023-01-17,Jessica,Flippo,jessica.flippo@carebridgehealth.com,1194221317,50,84-2590508,2,A,2242 DRY VALLEY RD,,THORN HILL,TN,37881,99214,"[S24.153S, G82.22, M62.838, N31.9]"
4,498085003657372,Betty,Ray,1937-03-16,7QY0U85FJ05,106297839,F,2022-08-05,Lacey,Bolden,lacey.bolden@carebridgehealth.com,1740410703,50,84-2590508,2,A,2114 DUNCAN AVE,,CHATTANOOGA,TN,37404,98966,[Z51.89]


In [11]:
# dupe check
assert df.elation_bill_id.nunique() == df.shape[0]

In [12]:
# icd check
assert df.loc[df.icds.isna()].shape[0] == 0

In [13]:
# if more than 40 we need to duplicate the row and add the additional icds on the dupe row
# currently not coded up because the situation doesn't exist
assert df.icds.apply(lambda x: x if x is None else len(set(x))).max() <= 40

#### Pad dx list col to 40

In [None]:
def pad_dx_col(icds):
    if icds is None:
        return [None] * 40
    
    icds = [i.replace('.', '') for i in set(icds)]
    
    if len(icds) < 40:
        return icds + [None] * (40 - len(icds))
    return icds
    
    
df.icds = df.icds.apply(pad_dx_col)

### Format dates

In [None]:
df.date_of_service = pd.to_datetime(df.date_of_service).dt.strftime('%m/%d/%Y')
df.patient_dob = pd.to_datetime(df.patient_dob).dt.strftime('%m/%d/%Y')

### Build rows

In [None]:
def build_file_row(i, row):
    return [
        'DTL', # seg type
        row.elation_bill_id, # ref #
        row.patient_last_name,
        row.patient_first_name,
        None, # MI
        row.patient_dob,
        row.member_id if row.mbi is None else None, # member id
        None, # 'Retrieval NPI (Internal Use)',
        row.gender, # 'GENDER',
        None, # 'STATE CODE',
        row.mbi, # 'MBI-  need either member ID or MBI',
        row.date_of_service, # 'FDOS',
        row.date_of_service, # 'TDOS',
        None, # 'BILL TYPE- Institutional Only',
        None, # 'NU Indicator (Internal Use)',
        None, # 'PROV ID',
        row.provider_npi, # 'NPI',
        None, # 'PROV TYPE',
        None, # 'FACILITY NM- required for Institutional',
        row.provider_last_name, # 'PROV LAST NAME- Required for Professional',
        row.provider_first_name, # 'PROV FIRST NAME- required for Professional',
        f'{row.medicare_specialty_code_int}', # 'CMS SPECIALTY- Required for Professional',
        row.tax_id, # 'TAX ID',
        row.procedure_code, # 'CPT- Professional and Hospital Outpatient only',
        None, # 'REV CODE - Required for Institutional',
        None, # 'SERVICE FDOS',
        None, # 'SERVICE TDOS',
        row.place_of_service, # 'POS- Professional only',
        '0', # 'ICD INDIC',
        row.ra_code, # 'RA Code- Required for Professional',
        None, # 'Chart Barcode (Internal Use)',
        None, # 'Chart Enc Key (Internal Use)',
        None, # 'Chart DX Key  (Internal Use)',
        None, # 'Contract ID (Tufts use only)',
        row.line1, # 'Mem Street Address ',
        row.line2, # 'Mem Address 2',
        row.city, # 'Mem City',
        row.state, # 'Mem State',
        row.postal_code, # 'Mem Zip Code',
        None, # 'CLAIMID/PCN', -- This would link it to an existing claim and make it a resubmit
    ] + row.icds

rows = []
for i, row in df.iterrows():
    rows.append(build_file_row(i, row))

### Add header and footer rows

In [None]:
n_rows, n_columns = len(rows), len(rows[0])
# 100k limit per file
assert n_rows <= 100000

training_partner_id = '00795'
file_name = f'{training_partner_id}_UHC_ASMP_DIRECT_{datetime.now().strftime("%Y%m%d%H%M%S")}.txt'

header_vals = [
    'HDR',
    '8.9',
    training_partner_id,
    file_name,
    'UHC',
    'DIRECT',
    'P',
    'Y'
]
header_vals = header_vals + [None] * (n_columns - len(header_vals))
footer_vals = [
    'TRL',
    training_partner_id,
    n_rows
]
footer_vals = footer_vals + [None] * (n_columns - len(footer_vals))

In [None]:
rows.insert(0, header_vals)
rows.append(footer_vals)

### Build File

In [None]:
dx_cols = [f'DX_{i}' for i in range(40)]
columns = [
    'SEG TYPE',
    'REF #',
    'LAST NAME',
    'FIRST NAME',
    'MI',
    'DOB',
    'MEMBER ID- need either member ID or MBI',
    'Retrieval NPI (Internal Use)',
    'GENDER',
    'STATE CODE',
    'MBI-  need either member ID or MBI',
    'FDOS',
    'TDOS',
    'BILL TYPE- Institutional Only',
    'NU Indicator (Internal Use)',
    'PROV ID',
    'NPI',
    'PROV TYPE',
    'FACILITY NM- required for Institutional',
    'PROV LAST NAME- Required for Professional',
    'PROV FIRST NAME- required for Professional',
    'CMS SPECIALTY- Required for Professional',
    'TAX ID',
    'CPT- Professional and Hospital Outpatient only',
    'REV CODE - Required for Institutional',
    'SERVICE FDOS',
    'SERVICE TDOS',
    'POS- Professional only',
    'ICD INDIC',
    'RA Code- Required for Professional',
    'Chart Barcode (Internal Use)',
    'Chart Enc Key (Internal Use)',
    'Chart DX Key  (Internal Use)',
    'Contract ID (Tufts use only)',
    'Mem Street Address ',
    'Mem Address 2',
    'Mem City',
    'Mem State',
    'Mem Zip Code',
    'CLAIMID/PCN',
] + dx_cols

In [None]:
file_df = pd.DataFrame(rows, columns=columns)

In [None]:
file_df.head(20)

In [None]:
file_df.to_csv(f'./data/{file_name}', sep='|', index=False, header=False)

In [None]:
file_name

In [None]:
'Hel O'

### response

In [None]:
import re

cols = [re.sub('[^0-9a-zA-Z]+', '_', c.lower()) for c in columns+['err_id', 'error']]
df = pd.read_csv('/Users/bp/workspace/cb/data/00795_UHC_ASMP_DIRECT_20220328162146_RESUB1.txt',
                 sep='|', header=None, skipfooter=1, skiprows=1, names=cols, engine='python'
                )
df.head()


In [None]:
df.error.value_counts()

In [None]:
# HDR|8.9|00795|00795_UHC_ASMP_DIRECT_20220328162146_RESUB1.txt|UHC|DIRECT|P|Y

In [None]:
df.loc[df.error == 'err18-Blank MemberID; err40-Member not found in plan membership;']

In [None]:
df.to_csv('/Users/bp/Downloads/uhc_supp_file_errors.csv', index=False)