In [2]:
import os
import sys
import time
import random
import warnings
import collections
from dateutil.relativedelta import relativedelta
from datetime import datetime
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

sys.path.append('../../src')
import cb_utils

sns.set(style="darkgrid")
pd.options.display.max_columns = 500

%load_ext autoreload
%autoreload 2

# Resubmit members that had a lastname mismatch error

In [3]:
mbi_to_resubmit = [
    '5RG7YT9AY73',
    '1UC7JJ6TM59',
    '7K65HV5GV71',
    '9AU9A34YK12',
    '8Y18D31PR53',
    '3T82UG8JV12',
    '5CN8Y14YU34',
    '1MT0C67XC34',
    '4UJ1HR2CC27',
    '1A98V71CV24',
    '8P57Q74YG39',
    '1U27KM7PU36',
    '3RA8V54EJ46',
    '8YY2DK2UX86',
    '9GH3YR7FA15',
    '9G85GA1CG52',
    '3AF9P47TT17',
    '4K05T38MH74',
    '4M30JA1UN42',
    '6TV7TJ6AM54',
    '6TV7TJ6AM54'
]

# configuration
use_cache = False
seed = random.randint(0, 100)
from_date = '2022-12-20'

print(f'Seed: {seed}')

Seed: 79


### Pull Data

In [4]:
query = f"""
SELECT * FROM perm.supp_file_uhc_20231220;
"""

In [5]:
df = cb_utils.sql_query_to_df(query, use_cache=use_cache)

Pulling query from db


In [6]:
df.shape # 4609 -> 6954

(7223, 23)

In [7]:
df.mbi.nunique(),  df.member_id.nunique()

(6728, 6736)

In [8]:
df = df.loc[df.mbi.isin(mbi_to_resubmit)] 

In [9]:
df.shape

(21, 23)

In [11]:
df.patient_last_name = ''

In [12]:
df.head()

Unnamed: 0,elation_bill_id,patient_first_name,patient_last_name,patient_dob,mbi,member_id,gender,date_of_service,provider_first_name,provider_last_name,provider_email,provider_npi,medicare_specialty_code_int,tax_id,place_of_service,ra_code,line1,line2,city,state,postal_code,procedure_code,icds
313,651235198632092,Jeanette,,1939-02-24,5RG7YT9AY73,121723611,F,2023-08-23,Piper,Hammond,Piper.Hammond@carebridgehealth.com,1376196717,50,84-2590508,10,A,928 Highway 100 west,,Centerville,TN,37033,99214,"[I69.351, M06.9, F01.50, F01.50, F32.1, I71.9,..."
375,706086749143196,Ginger,,1941-05-15,1UC7JJ6TM59,115706736,F,2023-12-13,Tera,Gearhart,Tera.Gearhart@carebridgehealth.com,1720276157,50,84-2590508,10,A,786 hartland lane,,Savannah,TN,38372,99214,"[F48.2, Z68.25, G30.9, F02.80, I69.351, Z91.81..."
707,666899283312796,Esther,,1957-09-18,7K65HV5GV71,125886377,F,2023-09-25,Clarissa,Dumdei,Clarissa.Dumdei@carebridgehealth.com,1881988624,50,84-2590508,10,A,3227 Pecan St,,Houston,TX,77087,99212,"[E11.3299, E11.40, R29.6, Z79.4, J44.9, M15.3,..."
806,683115733319836,Ophelia,,1937-06-09,9AU9A34YK12,115857011,F,2023-10-25,Chelsey,Hanen,Chelsey.Hanen@carebridgehealth.com,1407427214,50,84-2590508,10,A,1232 N Piatt Ave,,Wichita,KS,67214,99214,"[I13.0, I13.0, I13.0, N18.31, I50.9, E26.1, F3..."
1099,694365678600348,Rebecca,,1958-08-26,8Y18D31PR53,126393415,F,2023-11-17,Chelsey,Hanen,Chelsey.Hanen@carebridgehealth.com,1407427214,50,84-2590508,10,A,133 Deanly Dr,,Pittsburg,KS,66762,99204,"[I10, J44.9, J44.9, F32.9, M79.604]"


In [13]:
# dupe check
assert df.elation_bill_id.nunique() == df.shape[0]

In [14]:
# icd check
assert df.loc[df.icds.isna()].shape[0] == 0

In [15]:
# if more than 40 we need to duplicate the row and add the additional icds on the dupe row
# currently not coded up because the situation doesn't exist
assert df.icds.apply(lambda x: x if x is None else len(set(x))).max() <= 40

#### Pad dx list col to 40

In [16]:
def pad_dx_col(icds):
    if icds is None:
        return [None] * 40
    
    icds = [i.replace('.', '') for i in set(icds)]
    
    if len(icds) < 40:
        return icds + [None] * (40 - len(icds))
    return icds
    
    
df.icds = df.icds.apply(pad_dx_col)

### Format dates

In [17]:
df.date_of_service = pd.to_datetime(df.date_of_service).dt.strftime('%m/%d/%Y')
df.patient_dob = pd.to_datetime(df.patient_dob).dt.strftime('%m/%d/%Y')

### Pad CMS IDs

In [18]:
df.place_of_service = df.place_of_service.str.pad(width=2, side='left', fillchar='0')
df.medicare_specialty_code_int = df.medicare_specialty_code_int.str.pad(width=2, side='left', fillchar='0')

### Build rows

In [19]:
def build_file_row(i, row):
    return [
        'DTL', # seg type
        row.elation_bill_id, # ref #
        row.patient_last_name,
        row.patient_first_name,
        None, # MI
        row.patient_dob,
        row.member_id if row.mbi is None else None, # member id
        None, # 'Retrieval NPI (Internal Use)',
        row.gender, # 'GENDER',
        None, # 'STATE CODE',
        row.mbi, # 'MBI-  need either member ID or MBI',
        row.date_of_service, # 'FDOS',
        row.date_of_service, # 'TDOS',
        None, # 'BILL TYPE- Institutional Only',
        None, # 'NU Indicator (Internal Use)',
        None, # 'PROV ID',
        row.provider_npi, # 'NPI',
        None, # 'PROV TYPE',
        None, # 'FACILITY NM- required for Institutional',
        row.provider_last_name, # 'PROV LAST NAME- Required for Professional',
        row.provider_first_name, # 'PROV FIRST NAME- required for Professional',
        f'{row.medicare_specialty_code_int}', # 'CMS SPECIALTY- Required for Professional',
        row.tax_id, # 'TAX ID',
        row.procedure_code, # 'CPT- Professional and Hospital Outpatient only',
        None, # 'REV CODE - Required for Institutional',
        None, # 'SERVICE FDOS',
        None, # 'SERVICE TDOS',
        row.place_of_service, # 'POS- Professional only',
        '0', # 'ICD INDIC',
        row.ra_code, # 'RA Code- Required for Professional',
        None, # 'Chart Barcode (Internal Use)',
        None, # 'Chart Enc Key (Internal Use)',
        None, # 'Chart DX Key  (Internal Use)',
        None, # 'Contract ID (Tufts use only)',
        row.line1, # 'Mem Street Address ',
        row.line2, # 'Mem Address 2',
        row.city, # 'Mem City',
        row.state, # 'Mem State',
        row.postal_code, # 'Mem Zip Code',
        None, # 'CLAIMID/PCN', -- This would link it to an existing claim and make it a resubmit
    ] + row.icds

rows = []
for i, row in df.iterrows():
    rows.append(build_file_row(i, row))

### Add header and footer rows

In [20]:
n_rows, n_columns = len(rows), len(rows[0])
# 100k limit per file
assert n_rows <= 100000

training_partner_id = '00795'
file_name = f'{training_partner_id}_UHC_ASMP_DIRECT_{datetime.now().strftime("%Y%m%d%H%M%S")}.txt'

header_vals = [
    'HDR',
    '8.9',
    training_partner_id,
    file_name,
    'UHC',
    'DIRECT',
    'P',
    'Y'
]
header_vals = header_vals + [None] * (n_columns - len(header_vals))
footer_vals = [
    'TRL',
    training_partner_id,
    n_rows
]
footer_vals = footer_vals + [None] * (n_columns - len(footer_vals))

In [21]:
rows.insert(0, header_vals)
rows.append(footer_vals)

### Build File

In [22]:
dx_cols = [f'DX_{i}' for i in range(40)]
columns = [
    'SEG TYPE',
    'REF #',
    'LAST NAME',
    'FIRST NAME',
    'MI',
    'DOB',
    'MEMBER ID- need either member ID or MBI',
    'Retrieval NPI (Internal Use)',
    'GENDER',
    'STATE CODE',
    'MBI-  need either member ID or MBI',
    'FDOS',
    'TDOS',
    'BILL TYPE- Institutional Only',
    'NU Indicator (Internal Use)',
    'PROV ID',
    'NPI',
    'PROV TYPE',
    'FACILITY NM- required for Institutional',
    'PROV LAST NAME- Required for Professional',
    'PROV FIRST NAME- required for Professional',
    'CMS SPECIALTY- Required for Professional',
    'TAX ID',
    'CPT- Professional and Hospital Outpatient only',
    'REV CODE - Required for Institutional',
    'SERVICE FDOS',
    'SERVICE TDOS',
    'POS- Professional only',
    'ICD INDIC',
    'RA Code- Required for Professional',
    'Chart Barcode (Internal Use)',
    'Chart Enc Key (Internal Use)',
    'Chart DX Key  (Internal Use)',
    'Contract ID (Tufts use only)',
    'Mem Street Address ',
    'Mem Address 2',
    'Mem City',
    'Mem State',
    'Mem Zip Code',
    'CLAIMID/PCN',
] + dx_cols

In [23]:
file_df = pd.DataFrame(rows, columns=columns)

In [24]:
file_df.head(20)

Unnamed: 0,SEG TYPE,REF #,LAST NAME,FIRST NAME,MI,DOB,MEMBER ID- need either member ID or MBI,Retrieval NPI (Internal Use),GENDER,STATE CODE,MBI- need either member ID or MBI,FDOS,TDOS,BILL TYPE- Institutional Only,NU Indicator (Internal Use),PROV ID,NPI,PROV TYPE,FACILITY NM- required for Institutional,PROV LAST NAME- Required for Professional,PROV FIRST NAME- required for Professional,CMS SPECIALTY- Required for Professional,TAX ID,CPT- Professional and Hospital Outpatient only,REV CODE - Required for Institutional,SERVICE FDOS,SERVICE TDOS,POS- Professional only,ICD INDIC,RA Code- Required for Professional,Chart Barcode (Internal Use),Chart Enc Key (Internal Use),Chart DX Key (Internal Use),Contract ID (Tufts use only),Mem Street Address,Mem Address 2,Mem City,Mem State,Mem Zip Code,CLAIMID/PCN,DX_0,DX_1,DX_2,DX_3,DX_4,DX_5,DX_6,DX_7,DX_8,DX_9,DX_10,DX_11,DX_12,DX_13,DX_14,DX_15,DX_16,DX_17,DX_18,DX_19,DX_20,DX_21,DX_22,DX_23,DX_24,DX_25,DX_26,DX_27,DX_28,DX_29,DX_30,DX_31,DX_32,DX_33,DX_34,DX_35,DX_36,DX_37,DX_38,DX_39
0,HDR,8.9,795.0,00795_UHC_ASMP_DIRECT_20240108121208.txt,UHC,DIRECT,P,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,DTL,651235198632092.0,,Jeanette,,02/24/1939,,,F,,5RG7YT9AY73,08/23/2023,08/23/2023,,,,1376196717.0,,,Hammond,Piper,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,928 Highway 100 west,,Centerville,TN,37033.0,,I69351,E785,M069,Z6822,I719,F32A,M797,F321,F0150,J45909,N390,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,DTL,706086749143196.0,,Ginger,,05/15/1941,,,F,,1UC7JJ6TM59,12/13/2023,12/13/2023,,,,1720276157.0,,,Gearhart,Tera,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,786 hartland lane,,Savannah,TN,38372.0,,I69351,R32,Z6825,N390,E1169,F0280,G8929,M1990,G309,D692,I69391,E119,K219,J449,I69320,E785,B379,I110,Z9181,F482,Z794,F1120,E1142,,,,,,,,,,,,,,,,,
3,DTL,666899283312796.0,,Esther,,09/18/1957,,,F,,7K65HV5GV71,09/25/2023,09/25/2023,,,,1881988624.0,,,Dumdei,Clarissa,50.0,84-2590508,99212.0,,,,10.0,0.0,A,,,,,3227 Pecan St,,Houston,TX,77087.0,,J449,M153,I509,E113299,Z794,R296,E1140,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,DTL,683115733319836.0,,Ophelia,,06/09/1937,,,F,,9AU9A34YK12,10/25/2023,10/25/2023,,,,1407427214.0,,,Hanen,Chelsey,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,1232 N Piatt Ave,,Wichita,KS,67214.0,,E785,F330,E261,E1140,N3281,I509,J449,M109,N1830,I480,D6869,N1831,I130,I25119,E6601,,,,,,,,,,,,,,,,,,,,,,,,,
5,DTL,694365678600348.0,,Rebecca,,08/26/1958,,,F,,8Y18D31PR53,11/17/2023,11/17/2023,,,,1407427214.0,,,Hanen,Chelsey,50.0,84-2590508,99204.0,,,,10.0,0.0,A,,,,,133 Deanly Dr,,Pittsburg,KS,66762.0,,I10,M79604,J449,F329,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,DTL,623148216221852.0,,Becca,,01/14/1946,,,F,,3T82UG8JV12,06/22/2023,06/22/2023,,,,1255606190.0,,,Shroth,Michael,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,3528 E 14th St N,,Wichita,KS,67208.0,,E785,G4700,Z6835,E669,N1832,K8689,I129,M109,F17200,H409,E6601,F322,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7,DTL,662092581240988.0,,Melissa,,02/19/1973,,,F,,5CN8Y14YU34,09/14/2023,09/14/2023,,,,1306445846.0,,,Ball,Stephanie,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,25034 Brown Road,,Cherryvale,KS,67335.0,,G40909,K739,F988,E1040,D8481,F1320,K5900,Z85038,F1120,Z6823,F319,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,DTL,629551297593500.0,,Lisa,,07/04/1971,,,F,,1MT0C67XC34,07/07/2023,07/07/2023,,,,1407427214.0,,,Hanen,Chelsey,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,900 N Woodlawn,124,Wellington,KS,67152.0,,G039,I10,B379,E1169,E785,E119,R2689,M5020,M419,M1990,M5134,M533,G43909,J45909,Z90710,Z6837,G5700,E6601,,,,,,,,,,,,,,,,,,,,,,
9,DTL,625613189480604.0,,Lisa,,05/23/1967,,,F,,4UJ1HR2CC27,06/27/2023,06/27/2023,,,,1316285174.0,,,Harpe,Heather,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,703 Prospect Ave,,Charlottesville,VA,22903.0,,E785,E1169,J449,T753XXA,Z8673,E1151,I69354,F1120,D329,I25119,E1165,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [26]:
file_df.to_csv(f'../data/{file_name}', sep='|', index=False, header=False)

In [27]:
file_name

'00795_UHC_ASMP_DIRECT_20240108121208.txt'

### response

In [None]:
import re

cols = [re.sub('[^0-9a-zA-Z]+', '_', c.lower()) for c in columns+['err_id', 'error']]
df = pd.read_csv('/Users/bp/workspace/cb/data/00795_UHC_ASMP_DIRECT_20220328162146_RESUB1.txt',
                 sep='|', header=None, skipfooter=1, skiprows=1, names=cols, engine='python'
                )
df.head()


In [None]:
df.error.value_counts()

In [None]:
# HDR|8.9|00795|00795_UHC_ASMP_DIRECT_20220328162146_RESUB1.txt|UHC|DIRECT|P|Y

In [None]:
df.loc[df.error == 'err18-Blank MemberID; err40-Member not found in plan membership;']

In [1]:
df.to_csv('/Users/bp/Downloads/uhc_supp_file_errors_20230615.csv', index=False)

NameError: name 'df' is not defined