In [1]:
import os
import sys
import time
import random
import warnings
import collections
from dateutil.relativedelta import relativedelta
from datetime import datetime
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

sys.path.append('../../src')
import cb_utils

sns.set(style="darkgrid")
pd.options.display.max_columns = 500

%load_ext autoreload
%autoreload 2

# Script to generate supplemental file for claim submission
Currently configured for UHC, other plans may be signifigantly different

In [2]:
# configuration
use_cache = False
seed = random.randint(0, 100)
from_date = '2022-12-20'

print(f'Seed: {seed}')

Seed: 59


### Pull Data

In [10]:
query = f"""
SELECT * FROM perm.supp_file_uhc_20231220;
"""

In [11]:
df = cb_utils.sql_query_to_df(query, use_cache=use_cache)

Pulling query from db


In [12]:
df.shape # 4609 -> 6954

(7223, 23)

In [13]:
df.mbi.nunique(),  df.member_id.nunique()

(6728, 6736)

In [14]:
df.head()

Unnamed: 0,elation_bill_id,patient_first_name,patient_last_name,patient_dob,mbi,member_id,gender,date_of_service,provider_first_name,provider_last_name,provider_email,provider_npi,medicare_specialty_code_int,tax_id,place_of_service,ra_code,line1,line2,city,state,postal_code,procedure_code,icds
0,634150306381980,Elizabeth,Gallon,1951-03-27,8HH0UU4DN45,112803279,F,2023-07-17,Kathy,Agiri,kathy.agiri@carebridgehealth.com,1619328531,50,84-2590508,10,A,7486 E Holmes Rd,,Memphis,TN,38125,99214,"[E11.22, E11.22, E11.9, N18.31, E11.9, G30.9, ..."
1,687062888808604,Edith,Schlayer,1950-03-08,7GJ5T84QD50,106739268,F,2023-11-02,Jessica,Flippo,jessica.flippo@carebridgehealth.com,1194221317,50,84-2590508,10,A,601 E BARTON RIDGE RD,Apt 102,GREENEVILLE,TN,37745,99213,"[E66.01, Z68.43, J44.9, J44.9, I50.9, E26.1, I..."
2,671375910961308,David,Pavkov,1945-09-29,9TN4X20XM50,117338591,M,2023-10-03,Rachel,Fernandez,rachel.fernandez@carebridgehealth.com,1932703766,50,84-2590508,10,A,1100 RIDGECREST ST,,MORRISTOWN,TN,37814,99214,"[Z68.20, Z68.20, J44.9, J96.11, J96.11, F33.0,..."
3,626138868940956,Paul,Jones,1964-07-20,6NH8VQ8FR50,106769569,M,2023-06-28,Leanne,Balmer,Leanne.Balmer@carebridgehealth.com,1629335120,50,84-2590508,10,A,7823 GLEASON DR,APT. 203,KNOXVILLE,TN,37919,99214,"[R26.2, Q65.89, F32.4, E11.69, E78.5, M19.90]"
4,708435485786268,Lillie,Brewster,1959-09-14,6PY5X76QT52,115638846,F,2023-12-18,Leanne,Balmer,Leanne.Balmer@carebridgehealth.com,1629335120,50,84-2590508,10,A,7845 Basswood Drive Apt A,,Chattanooga,TN,37416,99213,"[I73.9, I10, J44.9, F20.9, F31.9, E78.5]"


In [15]:
# dupe check
assert df.elation_bill_id.nunique() == df.shape[0]

In [16]:
# icd check
assert df.loc[df.icds.isna()].shape[0] == 0

In [17]:
# if more than 40 we need to duplicate the row and add the additional icds on the dupe row
# currently not coded up because the situation doesn't exist
assert df.icds.apply(lambda x: x if x is None else len(set(x))).max() <= 40

#### Pad dx list col to 40

In [18]:
def pad_dx_col(icds):
    if icds is None:
        return [None] * 40
    
    icds = [i.replace('.', '') for i in set(icds)]
    
    if len(icds) < 40:
        return icds + [None] * (40 - len(icds))
    return icds
    
    
df.icds = df.icds.apply(pad_dx_col)

### Format dates

In [19]:
df.date_of_service = pd.to_datetime(df.date_of_service).dt.strftime('%m/%d/%Y')
df.patient_dob = pd.to_datetime(df.patient_dob).dt.strftime('%m/%d/%Y')

### Pad CMS IDs

In [20]:
df.place_of_service = df.place_of_service.str.pad(width=2, side='left', fillchar='0')
df.medicare_specialty_code_int = df.medicare_specialty_code_int.str.pad(width=2, side='left', fillchar='0')

### Build rows

In [21]:
def build_file_row(i, row):
    return [
        'DTL', # seg type
        row.elation_bill_id, # ref #
        row.patient_last_name,
        row.patient_first_name,
        None, # MI
        row.patient_dob,
        row.member_id if row.mbi is None else None, # member id
        None, # 'Retrieval NPI (Internal Use)',
        row.gender, # 'GENDER',
        None, # 'STATE CODE',
        row.mbi, # 'MBI-  need either member ID or MBI',
        row.date_of_service, # 'FDOS',
        row.date_of_service, # 'TDOS',
        None, # 'BILL TYPE- Institutional Only',
        None, # 'NU Indicator (Internal Use)',
        None, # 'PROV ID',
        row.provider_npi, # 'NPI',
        None, # 'PROV TYPE',
        None, # 'FACILITY NM- required for Institutional',
        row.provider_last_name, # 'PROV LAST NAME- Required for Professional',
        row.provider_first_name, # 'PROV FIRST NAME- required for Professional',
        f'{row.medicare_specialty_code_int}', # 'CMS SPECIALTY- Required for Professional',
        row.tax_id, # 'TAX ID',
        row.procedure_code, # 'CPT- Professional and Hospital Outpatient only',
        None, # 'REV CODE - Required for Institutional',
        None, # 'SERVICE FDOS',
        None, # 'SERVICE TDOS',
        row.place_of_service, # 'POS- Professional only',
        '0', # 'ICD INDIC',
        row.ra_code, # 'RA Code- Required for Professional',
        None, # 'Chart Barcode (Internal Use)',
        None, # 'Chart Enc Key (Internal Use)',
        None, # 'Chart DX Key  (Internal Use)',
        None, # 'Contract ID (Tufts use only)',
        row.line1, # 'Mem Street Address ',
        row.line2, # 'Mem Address 2',
        row.city, # 'Mem City',
        row.state, # 'Mem State',
        row.postal_code, # 'Mem Zip Code',
        None, # 'CLAIMID/PCN', -- This would link it to an existing claim and make it a resubmit
    ] + row.icds

rows = []
for i, row in df.iterrows():
    rows.append(build_file_row(i, row))

### Add header and footer rows

In [22]:
n_rows, n_columns = len(rows), len(rows[0])
# 100k limit per file
assert n_rows <= 100000

training_partner_id = '00795'
file_name = f'{training_partner_id}_UHC_ASMP_DIRECT_{datetime.now().strftime("%Y%m%d%H%M%S")}.txt'

header_vals = [
    'HDR',
    '8.9',
    training_partner_id,
    file_name,
    'UHC',
    'DIRECT',
    'P',
    'Y'
]
header_vals = header_vals + [None] * (n_columns - len(header_vals))
footer_vals = [
    'TRL',
    training_partner_id,
    n_rows
]
footer_vals = footer_vals + [None] * (n_columns - len(footer_vals))

In [23]:
rows.insert(0, header_vals)
rows.append(footer_vals)

### Build File

In [24]:
dx_cols = [f'DX_{i}' for i in range(40)]
columns = [
    'SEG TYPE',
    'REF #',
    'LAST NAME',
    'FIRST NAME',
    'MI',
    'DOB',
    'MEMBER ID- need either member ID or MBI',
    'Retrieval NPI (Internal Use)',
    'GENDER',
    'STATE CODE',
    'MBI-  need either member ID or MBI',
    'FDOS',
    'TDOS',
    'BILL TYPE- Institutional Only',
    'NU Indicator (Internal Use)',
    'PROV ID',
    'NPI',
    'PROV TYPE',
    'FACILITY NM- required for Institutional',
    'PROV LAST NAME- Required for Professional',
    'PROV FIRST NAME- required for Professional',
    'CMS SPECIALTY- Required for Professional',
    'TAX ID',
    'CPT- Professional and Hospital Outpatient only',
    'REV CODE - Required for Institutional',
    'SERVICE FDOS',
    'SERVICE TDOS',
    'POS- Professional only',
    'ICD INDIC',
    'RA Code- Required for Professional',
    'Chart Barcode (Internal Use)',
    'Chart Enc Key (Internal Use)',
    'Chart DX Key  (Internal Use)',
    'Contract ID (Tufts use only)',
    'Mem Street Address ',
    'Mem Address 2',
    'Mem City',
    'Mem State',
    'Mem Zip Code',
    'CLAIMID/PCN',
] + dx_cols

In [25]:
file_df = pd.DataFrame(rows, columns=columns)

In [26]:
file_df.head(20)

Unnamed: 0,SEG TYPE,REF #,LAST NAME,FIRST NAME,MI,DOB,MEMBER ID- need either member ID or MBI,Retrieval NPI (Internal Use),GENDER,STATE CODE,MBI- need either member ID or MBI,FDOS,TDOS,BILL TYPE- Institutional Only,NU Indicator (Internal Use),PROV ID,NPI,PROV TYPE,FACILITY NM- required for Institutional,PROV LAST NAME- Required for Professional,PROV FIRST NAME- required for Professional,CMS SPECIALTY- Required for Professional,TAX ID,CPT- Professional and Hospital Outpatient only,REV CODE - Required for Institutional,SERVICE FDOS,SERVICE TDOS,POS- Professional only,ICD INDIC,RA Code- Required for Professional,Chart Barcode (Internal Use),Chart Enc Key (Internal Use),Chart DX Key (Internal Use),Contract ID (Tufts use only),Mem Street Address,Mem Address 2,Mem City,Mem State,Mem Zip Code,CLAIMID/PCN,DX_0,DX_1,DX_2,DX_3,DX_4,DX_5,DX_6,DX_7,DX_8,DX_9,DX_10,DX_11,DX_12,DX_13,DX_14,DX_15,DX_16,DX_17,DX_18,DX_19,DX_20,DX_21,DX_22,DX_23,DX_24,DX_25,DX_26,DX_27,DX_28,DX_29,DX_30,DX_31,DX_32,DX_33,DX_34,DX_35,DX_36,DX_37,DX_38,DX_39
0,HDR,8.9,00795,00795_UHC_ASMP_DIRECT_20231220105150.txt,UHC,DIRECT,P,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,DTL,634150306381980.0,Gallon,Elizabeth,,03/27/1951,,,F,,8HH0UU4DN45,07/17/2023,07/17/2023,,,,1619328531.0,,,Agiri,Kathy,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,7486 E Holmes Rd,,Memphis,TN,38125.0,,E1122,F02B0,I69365,R630,E559,M1990,E119,N1831,I129,E785,G309,I10,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,DTL,687062888808604.0,Schlayer,Edith,,03/08/1950,,,F,,7GJ5T84QD50,11/02/2023,11/02/2023,,,,1194221317.0,,,Flippo,Jessica,50.0,84-2590508,99213.0,,,,10.0,0.0,A,,,,,601 E BARTON RIDGE RD,Apt 102,GREENEVILLE,TN,37745.0,,Z6843,F0390,J449,I509,E1151,E6601,E261,F330,F419,G4730,Z79899,E1142,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,DTL,671375910961308.0,Pavkov,David,,09/29/1945,,,M,,9TN4X20XM50,10/03/2023,10/03/2023,,,,1932703766.0,,,Fernandez,Rachel,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,1100 RIDGECREST ST,,MORRISTOWN,TN,37814.0,,J449,F330,R130,E1151,I739,I69354,F01B4,Z6820,H9190,J9611,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,DTL,626138868940956.0,Jones,Paul,,07/20/1964,,,M,,6NH8VQ8FR50,06/28/2023,06/28/2023,,,,1629335120.0,,,Balmer,Leanne,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,7823 GLEASON DR,APT. 203,KNOXVILLE,TN,37919.0,,Q6589,R262,M1990,E785,F324,E1169,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,DTL,708435485786268.0,Brewster,Lillie,,09/14/1959,,,F,,6PY5X76QT52,12/18/2023,12/18/2023,,,,1629335120.0,,,Balmer,Leanne,50.0,84-2590508,99213.0,,,,10.0,0.0,A,,,,,7845 Basswood Drive Apt A,,Chattanooga,TN,37416.0,,F319,J449,I739,F209,E785,I10,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
6,DTL,632730834370716.0,Jones,Mary,,06/22/1935,,,F,,1JC1EQ6EJ07,07/13/2023,07/13/2023,,,,1619328531.0,,,Agiri,Kathy,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,2010 Old Knoxville Hwy,,Sevierville,TN,37876.0,,G40909,F0390,K219,E039,D692,F1320,I509,Z89411,E261,I739,F419,I110,E441,Z89419,F03B0,,,,,,,,,,,,,,,,,,,,,,,,,
7,DTL,675564182503580.0,DODSON,NADINE,,01/20/1955,,,F,,8MK2F04QD89,10/11/2023,10/11/2023,,,,1619328531.0,,,Agiri,Kathy,50.0,84-2590508,99204.0,,,,10.0,0.0,A,,,,,1700 Dave Buck Rd,Apt 50,Johnson City,TN,37601.0,,Z6841,I25119,E1122,K219,F1320,J449,E6601,M5030,G4700,Z794,E785,N1831,Z9981,I129,E1142,F411,J9611,,,,,,,,,,,,,,,,,,,,,,,
8,DTL,688916259602588.0,DODSON,NADINE,,01/20/1955,,,F,,8MK2F04QD89,11/07/2023,11/07/2023,,,,1619328531.0,,,Agiri,Kathy,50.0,84-2590508,99213.0,,,,10.0,0.0,A,,,,,1700 Dave Buck Rd,Apt 50,Johnson City,TN,37601.0,,E1142,Z794,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,DTL,624602750451868.0,Warren,April,,01/15/1980,,,F,,1FP3FW6DP91,06/26/2023,06/26/2023,,,,1932703766.0,,,Fernandez,Rachel,50.0,84-2590508,99214.0,,,,10.0,0.0,A,,,,,132 West Broadway Ave Apt 205,,Maryville,TN,37801.0,,D84821,G629,G35,G8220,Z79899,N3281,Z6820,Z9359,G8250,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [27]:
file_df.to_csv(f'../data/{file_name}', sep='|', index=False, header=False)

In [28]:
file_name

'00795_UHC_ASMP_DIRECT_20231220105150.txt'

### response

In [None]:
import re

cols = [re.sub('[^0-9a-zA-Z]+', '_', c.lower()) for c in columns+['err_id', 'error']]
df = pd.read_csv('/Users/bp/workspace/cb/data/00795_UHC_ASMP_DIRECT_20220328162146_RESUB1.txt',
                 sep='|', header=None, skipfooter=1, skiprows=1, names=cols, engine='python'
                )
df.head()


In [None]:
df.error.value_counts()

In [None]:
# HDR|8.9|00795|00795_UHC_ASMP_DIRECT_20220328162146_RESUB1.txt|UHC|DIRECT|P|Y

In [None]:
df.loc[df.error == 'err18-Blank MemberID; err40-Member not found in plan membership;']

In [1]:
df.to_csv('/Users/bp/Downloads/uhc_supp_file_errors_20230615.csv', index=False)

NameError: name 'df' is not defined