In [2]:
import os
import sys
import time
import random
import warnings
import collections
from dateutil.relativedelta import relativedelta
from datetime import datetime
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

sys.path.append('../src')
import cb_utils

sns.set(style="darkgrid")
pd.options.display.max_columns = 500

%load_ext autoreload
%autoreload 2

# Script to generate supplemental file resubmition based on error report received
Errors were either due to
- m545 icd
- last name mismatch
- member not found in membership

To remedy
- Will not submit any m545's since they aren't tied to hccs in the first place
- Have put new ln lookup in a junk table thanks to MLT
- Have new subscriber IDs to try for the missing membership folk

In [3]:
# configuration
use_cache = False
seed = random.randint(0, 100)

print(f'Seed: {seed}')

Seed: 98


### Pull Data

In [4]:
query = """
WITH
    good_mbi                 AS
        ( SELECT
              mco_id
            , analytics_member_id
            , medicare_no
            , CASE WHEN medicare_no !~* 's|l|o|i|b|z' -- invalid characters in an MBI
                AND LENGTH(REPLACE(LOWER(TRIM(medicare_no)), '-', '')) = 11
                       THEN UPPER((regexp_match(REGEXP_REPLACE(LOWER(TRIM(medicare_no)), '[s|l|o|i|b|z|-]', '', 'g'),
                                                '[1-9][a-z][0-9a-z][0-9][a-z][0-9a-z][0-9][a-z][a-z][0-9][0-9]'))[1])
                  END mbi
          FROM
              fdw_member_doc.analytics_patients
          WHERE
              medicare_no IS NOT NULL )
  , encounter_icds           AS ( SELECT
                                      b.id                                                                      elation_bill_id
                                    , ARRAY_AGG(i.code ORDER BY bidx.seqno) FILTER ( WHERE i.code IS NOT NULL ) icds
                                  FROM
                                      fdw_member_doc.emr_origin_bill b
                                      JOIN fdw_member_doc.emr_origin_bill_item bi
                                           ON bi.bill_id = b.id AND bi.deletion_time ISNULL
                                      JOIN fdw_member_doc.emr_origin_visit_note vn
                                           ON vn.id = b.visit_note_id AND vn.deletion_time ISNULL
                                      JOIN fdw_member_doc.emr_origin_bill_item_dx bidx ON bi.id = bidx.bill_item_id
                                      JOIN fdw_member_doc.emr_origin_icd10 i ON i.id = bidx.icd10_id
                                      JOIN fdw_member_doc.patients p ON p.emr_origin_patient_id = vn.patient_id
                                  WHERE
                                        p.payer_id IN (2, 4, 5, 6)
                                    AND vn.signed_by_user_id IS NOT NULL
                                    AND vn.signed_time IS NOT NULL
                                    AND i.code !~* 'm545' -- These are non hcc and not actual icds. Errored out on report
                                  GROUP BY
                                      1 )
  , encounter_ra_cpt         AS ( SELECT
                                      b.id                                                    elation_bill_id
                                    , UPPER(bi.cpt)                                           procedure_code
                                    , ROW_NUMBER() OVER (PARTITION BY b.id ORDER BY bi.seqno) rn
                                  FROM
                                      fdw_member_doc.emr_origin_bill b
                                      JOIN encounter_icds ei ON ei.elation_bill_id = b.id
                                      JOIN fdw_member_doc.emr_origin_bill_item bi
                                           ON bi.bill_id = b.id AND bi.deletion_time ISNULL
                                      JOIN fdw_member_doc.procedure_codes pc
                                           ON pc.code = bi.cpt AND pc.is_valid_for_hccs )
  , pre_medicare_filter_list AS ( SELECT
                                      b.id                                                           elation_bill_id
                                    , COALESCE(name_error_fixes.xwalk_memb_first_name, p.first_name) patient_first_name
                                    , COALESCE(name_error_fixes.xwalk_memb_last_name, p.last_name)   patient_last_name
                                    , p.dob                                                          patient_dob
                                    , CASE WHEN mm.xwalk_subscriber_id IS NOT NULL THEN NULL
                                           ELSE COALESCE(mbi, TRIM(gm.medicare_no)) END              mbi -- DO WE WANT TO DO THIS or should this be NULL if it is not an MBI?
                                    , COALESCE(mm.xwalk_subscriber_id, p.subscriber_id)              member_id
                                    , LEFT(p.gender, 1)                                              gender
                                    , vn.document_date ::DATE                                        date_of_service
                                    , emr_u.first_name                                               provider_first_name
                                    , emr_u.last_name                                                provider_last_name
                                    , emr_u.email                                                    provider_email
                                    , emr_u.npi                                                      provider_npi
                                    , ntc.medicare_specialty_code_int                                medicare_specialty_code_int
                                    , '84-2590508'                                                   tax_id
                                    , '02'                                                           place_of_service
                                    --   , b.place_of_service
                                    , 'A'                                                            ra_code
                                    --, pa.line1                                     line1
                                    --, pa.line2                                     line2
                                    --, pa.city                                      city
                                    --, pa.state                                     state
                                    --, pa.postal_code                               postal_code
                                    , ecpt.procedure_code                                            procedure_code
                                    , ei.icds                                                        icds
                                    , p.analytics_member_id
                                    , p.id                                                           golgi_id
                                  FROM
                                      fdw_member_doc.emr_origin_bill b
                                      JOIN encounter_icds ei ON ei.elation_bill_id = b.id
                                      JOIN encounter_ra_cpt ecpt
                                           ON ei.elation_bill_id = ecpt.elation_bill_id AND ecpt.rn = 1
                                      JOIN fdw_member_doc.emr_origin_visit_note vn
                                           ON vn.id = b.visit_note_id AND vn.deletion_time ISNULL
                                      JOIN fdw_member_doc.emr_origin_user emr_u ON emr_u.id = vn.physician_user_id
                                      LEFT JOIN fdw_member_doc.provider_taxonomy_codes ptc ON ptc.npi = emr_u.npi
                                      LEFT JOIN ref.npi_taxonomy_crosswalk ntc
                                                ON ntc.provider_taxonomy_code = ptc.taxonomy_code
                                      JOIN fdw_member_doc.patients p ON p.emr_origin_patient_id = vn.patient_id
                                      LEFT JOIN good_mbi gm
                                                ON gm.analytics_member_id = p.analytics_member_id AND mbi IS NOT NULL
                                      LEFT JOIN junk.uhc_supp_file_ln_mismatches name_error_fixes
                                                ON name_error_fixes.golgi_patient_id = p.id
                                      LEFT JOIN junk.uhc_supp_file_membership_mismatches mm
                                                ON mm.golgi_patient_id = p.id
                                  WHERE
                                        p.payer_id IN (2, 4, 5, 6)
                                    AND vn.signed_by_user_id IS NOT NULL
                                    AND vn.signed_time IS NOT NULL
                                        -- AND ap.line_of_business_name != 'medicaid'
                                    AND p.id NOT IN (9888, 2911, 6868) -- not on latest cross walk
                                    and (mm.golgi_patient_id is not null or name_error_fixes.golgi_patient_id is not null) -- only want error folk
                                  ORDER BY
                                      vn.document_date::DATE )
  , ever_medicare            AS ( SELECT DISTINCT
                                      pl.analytics_member_id
                                    , pl.golgi_id
                                  FROM
                                      ( SELECT DISTINCT
                                            analytics_member_id
                                          , golgi_id
                                        FROM
                                            pre_medicare_filter_list x ) pl
                                      JOIN cb.members m ON m.id = pl.analytics_member_id
                                      JOIN cb.eligibility_days ed
                                           ON ed.member_id = pl.analytics_member_id AND ed.mco_id = m.mco_id AND
                                              ed.line_of_business_id IN (2, 3) )
  , one_address              AS ( SELECT *
                                  FROM
                                      ( SELECT
                                            pa.*
                                          , ROW_NUMBER()
                                            OVER (PARTITION BY em.golgi_id ORDER BY CASE WHEN pa.type = 'home' THEN '1' ELSE pa.type END ASC) rn
                                        FROM
                                            ever_medicare em
                                            JOIN fdw_member_doc.patient_addresses pa ON em.golgi_id = pa.patient_id ) addr
                                  WHERE
                                      addr.rn = 1 )
SELECT
    pml.elation_bill_id
  , pml.patient_first_name
  , pml.patient_last_name
  , pml.patient_dob
  , pml.mbi
  , pml.member_id
  , pml.gender
  , pml.date_of_service
  , pml.provider_first_name
  , pml.provider_last_name
  , pml.provider_email
  , pml.provider_npi
  , pml.medicare_specialty_code_int
  , pml.tax_id
  , pml.place_of_service
  , pml.ra_code
  , oa.line1
  , oa.line2
  , oa.city
  , oa.state
  , oa.postal_code
  , pml.procedure_code
  , pml.icds
FROM
    pre_medicare_filter_list pml
    JOIN ever_medicare em ON em.analytics_member_id = pml.analytics_member_id
    JOIN one_address oa ON oa.patient_id = pml.golgi_id

"""

In [5]:
df = cb_utils.sql_query_to_df(query, use_cache=use_cache)

Pulling query from db


In [6]:
df.shape # 4609 -> 6954

(37, 23)

In [7]:
df.mbi.nunique(),  df.member_id.nunique()

(11, 21)

In [8]:
df.head()

Unnamed: 0,elation_bill_id,patient_first_name,patient_last_name,patient_dob,mbi,member_id,gender,date_of_service,provider_first_name,provider_last_name,provider_email,provider_npi,medicare_specialty_code_int,tax_id,place_of_service,ra_code,line1,line2,city,state,postal_code,procedure_code,icds
0,281413557813404,Robert,Allen,1962-04-17,,117675277,M,2020-10-23,Carol,Scott,carol.scott+disabled31bcc1bf-8484-4c66-a92c-70...,1447355706,50,84-2590508,2,A,506 Green Street,,LEBANON,TN,37087,99204,"[I10, Z74.09, E78.5, K21.9, G82.20, G80.1]"
1,332151448076444,Robert,Allen,1962-04-17,,117675277,M,2021-05-24,Melinda,Henderson,melinda.henderson@carebridgehealth.com,1013966423,11,84-2590508,2,A,506 Green Street,,LEBANON,TN,37087,99214,"[G80.1, Z74.09, I10, E78.5, K21.9]"
2,277158814744732,Bill,Willmington,1947-05-06,,113701125,M,2020-10-07,Melinda,Henderson,melinda.henderson@carebridgehealth.com,1013966423,11,84-2590508,2,A,7110 GUM FORK RD,,WINFIELD,TN,37892,99204,"[I69.351, D69.2, M06.4, F32.5, R60.0, N40.0, K..."
3,283515779612828,Lela,Davis,1927-07-27,,106741245,F,2020-11-02,Kandace,Dalton,Kandace.dalton+disabled256c48fa-00f2-4210-be01...,1417430190,50,84-2590508,2,A,340 Main St N,,SAVANNAH,TN,38372,99204,"[Z74.09, G30.9, F02.80, I69.354, J44.9, I11.0,..."
4,285351900741788,Eddie,Leggins,1940-04-21,,106761976,M,2020-11-09,Kandace,Dalton,Kandace.dalton+disabled256c48fa-00f2-4210-be01...,1417430190,50,84-2590508,2,A,2076 Rile St,,Memphis,TN,38109,99204,"[I11.9, E78.5, G81.94, Z74.09, Z79.4, E11.9]"


In [9]:
# dupe check
assert df.elation_bill_id.nunique() == df.shape[0]

In [10]:
# icd check
assert df.loc[df.icds.isna()].shape[0] == 0

In [11]:
# if more than 40 we need to duplicate the row and add the additional icds on the dupe row
# currently not coded up because the situation doesn't exist
assert df.icds.apply(lambda x: x if x is None else len(set(x))).max() <= 40

#### Pad dx list col to 40

In [12]:
def pad_dx_col(icds):
    if icds is None:
        return [None] * 40
    
    icds = [i.replace('.', '') for i in set(icds)]
    
    if len(icds) < 40:
        return icds + [None] * (40 - len(icds))
    return icds
    
    
df.icds = df.icds.apply(pad_dx_col)

### Format dates

In [13]:
df.date_of_service = pd.to_datetime(df.date_of_service).dt.strftime('%m/%d/%Y')
df.patient_dob = pd.to_datetime(df.patient_dob).dt.strftime('%m/%d/%Y')

### Build rows

In [14]:
def build_file_row(i, row):
    return [
        'DTL', # seg type
        row.elation_bill_id, # ref #
        row.patient_last_name,
        row.patient_first_name,
        None, # MI
        row.patient_dob,
        row.member_id if row.mbi is None else None, # member id
        None, # 'Retrieval NPI (Internal Use)',
        row.gender, # 'GENDER',
        None, # 'STATE CODE',
        row.mbi, # 'MBI-  need either member ID or MBI',
        row.date_of_service, # 'FDOS',
        row.date_of_service, # 'TDOS',
        None, # 'BILL TYPE- Institutional Only',
        None, # 'NU Indicator (Internal Use)',
        None, # 'PROV ID',
        row.provider_npi, # 'NPI',
        None, # 'PROV TYPE',
        None, # 'FACILITY NM- required for Institutional',
        row.provider_last_name, # 'PROV LAST NAME- Required for Professional',
        row.provider_first_name, # 'PROV FIRST NAME- required for Professional',
        f'{row.medicare_specialty_code_int}', # 'CMS SPECIALTY- Required for Professional',
        row.tax_id, # 'TAX ID',
        row.procedure_code, # 'CPT- Professional and Hospital Outpatient only',
        None, # 'REV CODE - Required for Institutional',
        None, # 'SERVICE FDOS',
        None, # 'SERVICE TDOS',
        row.place_of_service, # 'POS- Professional only',
        '0', # 'ICD INDIC',
        row.ra_code, # 'RA Code- Required for Professional',
        None, # 'Chart Barcode (Internal Use)',
        None, # 'Chart Enc Key (Internal Use)',
        None, # 'Chart DX Key  (Internal Use)',
        None, # 'Contract ID (Tufts use only)',
        row.line1, # 'Mem Street Address ',
        row.line2, # 'Mem Address 2',
        row.city, # 'Mem City',
        row.state, # 'Mem State',
        row.postal_code, # 'Mem Zip Code',
        None, # 'CLAIMID/PCN', -- This would link it to an existing claim and make it a resubmit
    ] + row.icds

rows = []
for i, row in df.iterrows():
    rows.append(build_file_row(i, row))

### Add header and footer rows

In [15]:
n_rows, n_columns = len(rows), len(rows[0])
# 100k limit per file
assert n_rows <= 100000

training_partner_id = '00795'
file_name = f'{training_partner_id}_UHC_ASMP_DIRECT_{datetime.now().strftime("%Y%m%d%H%M%S")}.txt'

header_vals = [
    'HDR',
    '8.9',
    training_partner_id,
    file_name,
    'UHC',
    'DIRECT',
    'P',
    'Y'
]
header_vals = header_vals + [None] * (n_columns - len(header_vals))
footer_vals = [
    'TRL',
    training_partner_id,
    n_rows
]
footer_vals = footer_vals + [None] * (n_columns - len(footer_vals))

In [16]:
rows.insert(0, header_vals)
rows.append(footer_vals)

### Build File

In [17]:
dx_cols = [f'DX_{i}' for i in range(40)]
columns = [
    'SEG TYPE',
    'REF #',
    'LAST NAME',
    'FIRST NAME',
    'MI',
    'DOB',
    'MEMBER ID- need either member ID or MBI',
    'Retrieval NPI (Internal Use)',
    'GENDER',
    'STATE CODE',
    'MBI-  need either member ID or MBI',
    'FDOS',
    'TDOS',
    'BILL TYPE- Institutional Only',
    'NU Indicator (Internal Use)',
    'PROV ID',
    'NPI',
    'PROV TYPE',
    'FACILITY NM- required for Institutional',
    'PROV LAST NAME- Required for Professional',
    'PROV FIRST NAME- required for Professional',
    'CMS SPECIALTY- Required for Professional',
    'TAX ID',
    'CPT- Professional and Hospital Outpatient only',
    'REV CODE - Required for Institutional',
    'SERVICE FDOS',
    'SERVICE TDOS',
    'POS- Professional only',
    'ICD INDIC',
    'RA Code- Required for Professional',
    'Chart Barcode (Internal Use)',
    'Chart Enc Key (Internal Use)',
    'Chart DX Key  (Internal Use)',
    'Contract ID (Tufts use only)',
    'Mem Street Address ',
    'Mem Address 2',
    'Mem City',
    'Mem State',
    'Mem Zip Code',
    'CLAIMID/PCN',
] + dx_cols

In [18]:
file_df = pd.DataFrame(rows, columns=columns)

In [19]:
file_df.head(20)

Unnamed: 0,SEG TYPE,REF #,LAST NAME,FIRST NAME,MI,DOB,MEMBER ID- need either member ID or MBI,Retrieval NPI (Internal Use),GENDER,STATE CODE,MBI- need either member ID or MBI,FDOS,TDOS,BILL TYPE- Institutional Only,NU Indicator (Internal Use),PROV ID,NPI,PROV TYPE,FACILITY NM- required for Institutional,PROV LAST NAME- Required for Professional,PROV FIRST NAME- required for Professional,CMS SPECIALTY- Required for Professional,TAX ID,CPT- Professional and Hospital Outpatient only,REV CODE - Required for Institutional,SERVICE FDOS,SERVICE TDOS,POS- Professional only,ICD INDIC,RA Code- Required for Professional,Chart Barcode (Internal Use),Chart Enc Key (Internal Use),Chart DX Key (Internal Use),Contract ID (Tufts use only),Mem Street Address,Mem Address 2,Mem City,Mem State,Mem Zip Code,CLAIMID/PCN,DX_0,DX_1,DX_2,DX_3,DX_4,DX_5,DX_6,DX_7,DX_8,DX_9,DX_10,DX_11,DX_12,DX_13,DX_14,DX_15,DX_16,DX_17,DX_18,DX_19,DX_20,DX_21,DX_22,DX_23,DX_24,DX_25,DX_26,DX_27,DX_28,DX_29,DX_30,DX_31,DX_32,DX_33,DX_34,DX_35,DX_36,DX_37,DX_38,DX_39
0,HDR,8.9,00795,00795_UHC_ASMP_DIRECT_20220409122222.txt,UHC,DIRECT,P,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,DTL,281413557813404.0,Allen,Robert,,04/17/1962,117675277,,M,,,10/23/2020,10/23/2020,,,,1447355706.0,,,Scott,Carol,50.0,84-2590508,99204,,,,2.0,0.0,A,,,,,506 Green Street,,LEBANON,TN,37087.0,,K219,G8220,G801,Z7409,I10,E785,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,DTL,332151448076444.0,Allen,Robert,,04/17/1962,117675277,,M,,,05/24/2021,05/24/2021,,,,1013966423.0,,,Henderson,Melinda,11.0,84-2590508,99214,,,,2.0,0.0,A,,,,,506 Green Street,,LEBANON,TN,37087.0,,K219,G801,Z7409,I10,E785,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,DTL,277158814744732.0,Willmington,Bill,,05/06/1947,113701125,,M,,,10/07/2020,10/07/2020,,,,1013966423.0,,,Henderson,Melinda,11.0,84-2590508,99204,,,,2.0,0.0,A,,,,,7110 GUM FORK RD,,WINFIELD,TN,37892.0,,F0391,K219,F325,N400,K7469,M064,R600,D692,I69351,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,DTL,283515779612828.0,Davis,Lela,,07/27/1927,106741245,,F,,,11/02/2020,11/02/2020,,,,1417430190.0,,,Dalton,Kandace,50.0,84-2590508,99204,,,,2.0,0.0,A,,,,,340 Main St N,,SAVANNAH,TN,38372.0,,Z794,F325,J449,F1320,I509,I69354,F0280,J9611,G309,I110,E1165,Z7409,E785,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,DTL,285351900741788.0,Leggins,Eddie,,04/21/1940,106761976,,M,,,11/09/2020,11/09/2020,,,,1417430190.0,,,Dalton,Kandace,50.0,84-2590508,99204,,,,2.0,0.0,A,,,,,2076 Rile St,,Memphis,TN,38109.0,,Z794,E119,G8194,I119,Z7409,E785,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,DTL,293745218617500.0,Leggins,Eddie,,04/21/1940,106761976,,M,,,12/15/2020,12/15/2020,,,,1013966423.0,,,Henderson,Melinda,11.0,84-2590508,99213,,,,2.0,0.0,A,,,,,2076 Rile St,,Memphis,TN,38109.0,,Z794,M25559,E1169,G8194,I119,E785,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,DTL,301153039024284.0,Leggins,Eddie,,04/21/1940,106761976,,M,,,01/14/2021,01/14/2021,,,,1417430190.0,,,Dalton,Kandace,50.0,84-2590508,99214,,,,2.0,0.0,A,,,,,2076 Rile St,,Memphis,TN,38109.0,,E1165,G8194,Z713,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,DTL,310612139507868.0,Leggins,Eddie,,04/21/1940,106761976,,M,,,02/23/2021,02/23/2021,,,,1619385788.0,,,Poplar,Debra,50.0,84-2590508,99214,,,,2.0,0.0,A,,,,,2076 Rile St,,Memphis,TN,38109.0,,R600,M79605,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,DTL,313663419842716.0,Leggins,Eddie,,04/21/1940,106761976,,M,,,03/08/2021,03/08/2021,,,,1619385788.0,,,Poplar,Debra,50.0,84-2590508,99214,,,,2.0,0.0,A,,,,,2076 Rile St,,Memphis,TN,38109.0,,I878,L03116,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [20]:
file_df.to_csv(f'./data/{file_name}', sep='|', index=False, header=False)

In [None]:
file_name

### response

In [None]:
import re

cols = [re.sub('[^0-9a-zA-Z]+', '_', c.lower()) for c in columns+['err_id', 'error']]
df = pd.read_csv('/Users/bp/workspace/cb/data/00795_UHC_ASMP_DIRECT_20220328162146_RESUB1.txt',
                 sep='|', header=None, skipfooter=1, skiprows=1, names=cols, engine='python'
                )
df.head()


In [None]:
df.error.value_counts()

In [None]:
# HDR|8.9|00795|00795_UHC_ASMP_DIRECT_20220328162146_RESUB1.txt|UHC|DIRECT|P|Y

In [None]:
df.loc[df.error == 'err18-Blank MemberID; err40-Member not found in plan membership;']

In [None]:
df.to_csv('/Users/bp/Downloads/uhc_supp_file_errors.csv', index=False)