In [25]:
import os
import re
import sys
import time
import random
import warnings
import collections
from dateutil.relativedelta import relativedelta
from datetime import datetime
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns

sys.path.append('../../src')
import cb_utils

sns.set(style="darkgrid")
pd.options.display.max_columns = 500
pd.options.display.max_colwidth = None

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Script to parse supplemental file response from UHC

In [7]:
file_path = '/Users/bp/workspace/cb/data/00795_UHC_ASMP_DIRECT_20230608102118_RESUB1.txt'

### Columns from the file we built

In [8]:
dx_cols = [f'DX_{i}' for i in range(40)]
columns = [
    'SEG TYPE',
    'REF #',
    'LAST NAME',
    'FIRST NAME',
    'MI',
    'DOB',
    'MEMBER ID- need either member ID or MBI',
    'Retrieval NPI (Internal Use)',
    'GENDER',
    'STATE CODE',
    'MBI-  need either member ID or MBI',
    'FDOS',
    'TDOS',
    'BILL TYPE- Institutional Only',
    'NU Indicator (Internal Use)',
    'PROV ID',
    'NPI',
    'PROV TYPE',
    'FACILITY NM- required for Institutional',
    'PROV LAST NAME- Required for Professional',
    'PROV FIRST NAME- required for Professional',
    'CMS SPECIALTY- Required for Professional',
    'TAX ID',
    'CPT- Professional and Hospital Outpatient only',
    'REV CODE - Required for Institutional',
    'SERVICE FDOS',
    'SERVICE TDOS',
    'POS- Professional only',
    'ICD INDIC',
    'RA Code- Required for Professional',
    'Chart Barcode (Internal Use)',
    'Chart Enc Key (Internal Use)',
    'Chart DX Key  (Internal Use)',
    'Contract ID (Tufts use only)',
    'Mem Street Address ',
    'Mem Address 2',
    'Mem City',
    'Mem State',
    'Mem Zip Code',
    'CLAIMID/PCN',
] + dx_cols

### response

In [9]:
cols = [re.sub('[^0-9a-zA-Z]+', '_', c.lower()) for c in columns+['err_id', 'error']]
df = pd.read_csv(file_path, sep='|', header=None, skipfooter=1, skiprows=1, names=cols, engine='python')
df.head()

Unnamed: 0,seg_type,ref_,last_name,first_name,mi,dob,member_id_need_either_member_id_or_mbi,retrieval_npi_internal_use_,gender,state_code,mbi_need_either_member_id_or_mbi,fdos,tdos,bill_type_institutional_only,nu_indicator_internal_use_,prov_id,npi,prov_type,facility_nm_required_for_institutional,prov_last_name_required_for_professional,prov_first_name_required_for_professional,cms_specialty_required_for_professional,tax_id,cpt_professional_and_hospital_outpatient_only,rev_code_required_for_institutional,service_fdos,service_tdos,pos_professional_only,icd_indic,ra_code_required_for_professional,chart_barcode_internal_use_,chart_enc_key_internal_use_,chart_dx_key_internal_use_,contract_id_tufts_use_only_,mem_street_address_,mem_address_2,mem_city,mem_state,mem_zip_code,claimid_pcn,dx_0,dx_1,dx_2,dx_3,dx_4,dx_5,dx_6,dx_7,dx_8,dx_9,dx_10,dx_11,dx_12,dx_13,dx_14,dx_15,dx_16,dx_17,dx_18,dx_19,dx_20,dx_21,dx_22,dx_23,dx_24,dx_25,dx_26,dx_27,dx_28,dx_29,dx_30,dx_31,dx_32,dx_33,dx_34,dx_35,dx_36,dx_37,dx_38,dx_39,err_id,error
0,DTL,493387270586524,Schlayer,Edith,,03/08/1950,,,F,,7GJ5T84QD50,07/25/2022,07/25/2022,,,,1194221317,,,Flippo,Jessica,50.0,84-2590508,99212,,,,10.0,0,A,,,,,601 E BARTON RIDGE RD,Apt 102,GREENEVILLE,TN,37745.0,,G4730,F330,F419,E1151,E1142,E6601,F0390,I509,E261,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000001,err45-Blank or Invalid POS;
1,DTL,614232323915932,Bear,Isaiah,,10/30/1995,,,M,,6H86JY1DW54,06/05/2023,06/05/2023,,,,1720518137,,,Sanchez,Susana,50.0,84-2590508,99214,,,,10.0,0,A,,,,,584 DELORES DR,,Dandridge,TN,37725.0,,R569,F72,Q8789,I10,E441,H547,Z681,G809,Z931,H9190,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000002,err45-Blank or Invalid POS;
2,DTL,556769413890204,Hayes,Thomas,,10/25/1982,,,M,,6AH0C76FQ17,01/17/2023,01/17/2023,,,,1194221317,,,Flippo,Jessica,50.0,84-2590508,99214,,,,10.0,0,A,,,,,2242 DRY VALLEY RD,,THORN HILL,TN,37881.0,,S24153S,N319,M62838,G8222,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000003,err45-Blank or Invalid POS;
3,DTL,559953381752988,Ray,Betty,,03/16/1937,,,F,,7QY0U85FJ05,01/25/2023,01/25/2023,,,,1740410703,,,Bolden,Lacey,50.0,84-2590508,99214,,,,10.0,0,A,,,,,2114 DUNCAN AVE,,CHATTANOOGA,TN,37404.0,,K219,I130,E1142,E6601,N1831,E785,F0390,Z7409,F1120,I509,Z6839,J449,J9611,E1122,I69354,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000004,err45-Blank or Invalid POS;
4,DTL,491633387700380,Hammonds,Joe,,08/10/1954,,,M,,1DV6GR3MC46,07/20/2022,07/20/2022,,,,1740410703,,,Bolden,Lacey,50.0,84-2590508,99214,,,,10.0,0,A,,,,,1201 BOYNTON DR APT 805,,CHATTANOOGA,TN,37402.0,,F5104,I10,Z7901,G40919,E785,R531,E1169,I69354,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000005,err45-Blank or Invalid POS;


In [10]:
df.error.value_counts()

err45-Blank or Invalid POS;                                                                                                        13282
err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;                                         379
err24_ Invalid or Not Risk Adjustable CPT; err45-Blank or Invalid POS;                                                               139
err45-Blank or Invalid POS; Err66_ContractMismatch(MED);                                                                             130
err42-Member data mismatch (LN); err45-Blank or Invalid POS;                                                                          44
err45-Blank or Invalid POS; Err66_ContractMismatch();                                                                                  6
err22-Blank MBI; err40-Member not found in plan membership; err45-Blank or Invalid POS;                                                4
err24_ Invalid or Not Risk Adjustable CPT

In [11]:
# HDR|8.9|00795|00795_UHC_ASMP_DIRECT_20220328162146_RESUB1.txt|UHC|DIRECT|P|Y

In [12]:
df.to_csv('/Users/bp/Downloads/uhc_supp_file_errors_20230615.csv', index=False)

In [14]:
df.loc[df.error == 'err24_ Invalid or Not Risk Adjustable CPT; err45-Blank or Invalid POS;'].cpt_professional_and_hospital_outpatient_only.value_counts()

98966    117
99211     20
99422      1
99421      1
Name: cpt_professional_and_hospital_outpatient_only, dtype: int64

In [30]:
df.loc[df.error.str.contains('Diag Cd'), ['error', 'fdos'] + df.columns[-42:].tolist()].head(20)
# 'M545' # not valid for billing
# 'I714' # not valid for billing
# 'D7582' # not valid for billing
# 'F03911' # too new `23
# 'Z7985' # too new `23

Unnamed: 0,error,fdos,dx_0,dx_1,dx_2,dx_3,dx_4,dx_5,dx_6,dx_7,dx_8,dx_9,dx_10,dx_11,dx_12,dx_13,dx_14,dx_15,dx_16,dx_17,dx_18,dx_19,dx_20,dx_21,dx_22,dx_23,dx_24,dx_25,dx_26,dx_27,dx_28,dx_29,dx_30,dx_31,dx_32,dx_33,dx_34,dx_35,dx_36,dx_37,dx_38,dx_39,err_id,error.1
378,err27-Invalid Diag Cd DOS (DX 12); err45-Blank or Invalid POS;,03/25/2022,Z86718,Z8719,R569,K219,G4700,B182,I739,I10,R519,E785,F319,M545,J449,F17200,J9611,J302,M069,,,,,,,,,,,,,,,,,,,,,,,,483473-10000379,err27-Invalid Diag Cd DOS (DX 12); err45-Blank or Invalid POS;
1367,err27-Invalid Diag Cd DOS (DX 09); err45-Blank or Invalid POS;,06/29/2022,K219,Z780,G8929,J45909,I10,Z7189,E7800,E039,M545,Z9989,I7300,Z1211,Z6827,J302,M069,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10001368,err27-Invalid Diag Cd DOS (DX 09); err45-Blank or Invalid POS;
3879,err27-Invalid Diag Cd DOS (DX 05); err45-Blank or Invalid POS;,08/24/2022,I10,E785,K7460,F0391,F03911,F17200,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10003880,err27-Invalid Diag Cd DOS (DX 05); err45-Blank or Invalid POS;
4063,err27-Invalid Diag Cd DOS (DX 14); err45-Blank or Invalid POS;,09/16/2022,M109,I4820,K219,E1140,E1151,I495,Z6843,I5032,E6601,I110,E039,D6869,J449,Z7985,J9611,Z9981,E261,,,,,,,,,,,,,,,,,,,,,,,,483473-10004064,err27-Invalid Diag Cd DOS (DX 14); err45-Blank or Invalid POS;
4988,err18-Blank MemberID; err27-Invalid Diag Cd DOS (DX 02); err40-Member not found in plan membership; err45-Blank or Invalid POS;,12/02/2022,K219,I714,F0150,I209,B182,I110,I7102,E785,I509,G40909,I252,I69351,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10004989,err18-Blank MemberID; err27-Invalid Diag Cd DOS (DX 02); err40-Member not found in plan membership; err45-Blank or Invalid POS;
5227,err27-Invalid Diag Cd DOS (DX 17); err45-Blank or Invalid POS;,01/13/2023,D84821,N1832,E1151,F330,Z6843,E6601,F1320,N052,Z79899,G40909,M159,Z89511,W19XXXA,G4700,E1122,N2581,D7582,R87810,,,,,,,,,,,,,,,,,,,,,,,483473-10005228,err27-Invalid Diag Cd DOS (DX 17); err45-Blank or Invalid POS;
5236,err27-Invalid Diag Cd DOS (DX 15); err45-Blank or Invalid POS;,09/19/2022,M109,K219,E1140,K766,D696,F339,Z794,K7469,F4310,E261,R030,Z7722,G40909,J449,Z7985,G4700,F603,E11319,,,,,,,,,,,,,,,,,,,,,,,483473-10005237,err27-Invalid Diag Cd DOS (DX 15); err45-Blank or Invalid POS;


In [33]:
df.loc[df.error.str.contains('MemberID')]

Unnamed: 0,seg_type,ref_,last_name,first_name,mi,dob,member_id_need_either_member_id_or_mbi,retrieval_npi_internal_use_,gender,state_code,mbi_need_either_member_id_or_mbi,fdos,tdos,bill_type_institutional_only,nu_indicator_internal_use_,prov_id,npi,prov_type,facility_nm_required_for_institutional,prov_last_name_required_for_professional,prov_first_name_required_for_professional,cms_specialty_required_for_professional,tax_id,cpt_professional_and_hospital_outpatient_only,rev_code_required_for_institutional,service_fdos,service_tdos,pos_professional_only,icd_indic,ra_code_required_for_professional,chart_barcode_internal_use_,chart_enc_key_internal_use_,chart_dx_key_internal_use_,contract_id_tufts_use_only_,mem_street_address_,mem_address_2,mem_city,mem_state,mem_zip_code,claimid_pcn,dx_0,dx_1,dx_2,dx_3,dx_4,dx_5,dx_6,dx_7,dx_8,dx_9,dx_10,dx_11,dx_12,dx_13,dx_14,dx_15,dx_16,dx_17,dx_18,dx_19,dx_20,dx_21,dx_22,dx_23,dx_24,dx_25,dx_26,dx_27,dx_28,dx_29,dx_30,dx_31,dx_32,dx_33,dx_34,dx_35,dx_36,dx_37,dx_38,dx_39,err_id,error
139,DTL,432289519042716,Raymer,Daisy,,03/28/1929,,,F,,6JY3DN2XN80,03/31/2022,03/31/2022,,,,1811345341,,,Freeman,Megan,50.0,84-2590508,99213,,,,11.0,0,A,,,,,1685 Bunker Hill Rd,,Cookeville,TN,38506.0,,R627,F0390,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000140,err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;
484,DTL,502836801765532,Friday,Airfearer,,09/04/1954,,,F,,1EC3N32FD72,08/19/2022,08/19/2022,,,,1740410703,,,Bolden,Lacey,50.0,84-2590508,99214,,,,10.0,0,A,,,,,108 N Auburndale St Apt 409,,Memphis,TN,38104.0,,R569,S062X5S,F0390,I69951,N3281,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000485,err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;
485,DTL,512259942252700,Friday,Airfearer,,09/04/1954,,,F,,1EC3N32FD72,09/15/2022,09/15/2022,,,,1740410703,,,Bolden,Lacey,50.0,84-2590508,99213,,,,10.0,0,A,,,,,108 N Auburndale St Apt 409,,Memphis,TN,38104.0,,L089,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000486,err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;
486,DTL,581423447670940,Friday,Airfearer,,09/04/1954,,,F,,1EC3N32FD72,03/22/2023,03/22/2023,,,,1811345341,,,Freeman,Megan,50.0,84-2590508,99214,,,,10.0,0,A,,,,,108 N Auburndale St Apt 409,,Memphis,TN,38104.0,,R569,S062X5S,F0390,I69951,N3281,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000487,err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;
517,DTL,614794085728412,Mason,Alberta,,08/17/1927,,,F,,1CA2Q38GP29,06/06/2023,06/06/2023,,,,1841796091,,,Holland,Jamie,50.0,84-2590508,99203,,,,10.0,0,A,,,,,5185 Highway 57 Apt 24d,,Rossville,TN,38066.0,,K219,I152,J449,Z6824,E1159,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10000518,err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13988,DTL,608853291892892,Bair,Michael,,03/01/1989,,,M,,2XW1JP0EJ93,05/24/2023,05/24/2023,,,,1588195986,,,Dziama,Alexandra,50.0,84-2590508,99203,,,,10.0,0,A,,,,,58b South Ave,,Attleboro,MA,2703.0,,Z6828,I429,I4891,F339,G1111,E785,E039,G7100,L98429,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10013989,err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;
13989,DTL,608828592095388,Soares,Amy,,12/16/1981,,,F,,3EX5ME7XY75,05/24/2023,05/24/2023,,,,1588195986,,,Dziama,Alexandra,50.0,84-2590508,99203,,,,10.0,0,A,,,,,350 Bakerville Rd,,South Dartmouth,MA,2748.0,,Z6838,I10,E6601,E785,G43909,G35,M069,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10013990,err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;
13991,DTL,607035621441692,Thomas,Fredrick,,02/27/1969,,,M,,3XN2JX7QR82,05/19/2023,05/19/2023,,,,1619328531,,,Agiri,Kathy,50.0,84-2590508,99203,,,,10.0,0,A,,,,,735 Shawmut Ave,Apt 718,Roxbury,MA,2119.0,,Z952,E261,I509,Z933,C189,B20,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10013992,err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;
13992,DTL,606311445889180,Panorelli,Victoria,,10/20/1963,,,F,,5KC9W01TA39,05/18/2023,05/18/2023,,,,1619328531,,,Agiri,Kathy,50.0,84-2590508,99203,,,,10.0,0,A,,,,,240 River St,,Haverhill,MA,1832.0,,E6601,E785,F319,I509,E1169,Z6842,J449,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,483473-10013993,err18-Blank MemberID; err40-Member not found in plan membership; err45-Blank or Invalid POS;


In [34]:
conn = cb_utils.get_engine(source="analytics")
df.to_sql("uhc_supp_file_errors_20230615", conn, schema='junk', index=False, method='multi', chunksize=1000)

13995