In [98]:
import pandas as pd
import numpy as np
import polars as pl
import pyarrow

In [99]:
def payer_category(x):
    x = x.lower()
    if 'min' in x:
        return 'min'
    elif 'max' in x:
        return'max'
    elif 'cash_price' in x:
        return 'cash'
    elif 'gross' in x:
        return 'gross'

In [100]:
df = pd.read_csv('58-1645396_BMH_Restorative_Care_STANDARDCHARGES.csv', delimiter="|", encoding='ansi')

In [101]:
date = df['FDATE'][0]
df = df.drop(['FDATE', 'FAC'], axis=1)

In [102]:
df.rename(columns=lambda x: x.strip().strip(), inplace=True)

In [103]:
df.rename(columns={
    'Code_Type': 'line_type',
    'Code': 'code',
    'Procedure': 'local_code',
    'Procedure_Description': 'description',
    'NDC': 'ndc',
    'Rev_Code':'rev_code',
    'Plan': 'plan'
}, inplace=True)

In [104]:
df.loc[df['CODE_DESC'].str.contains('MS-LTC'), 'code'] = df['CODE_DESC']

df.loc[df['code'].str.contains('MS-LTC'), 'ms_drg'] = df['code'].str.replace('MS-LTC', '')
df.loc[df['CODE_DESC'] == 'HCPCS', 'hcpcs_cpt'] = df['code'].str.replace('HCPCS ', '')
df.loc[df['CODE_DESC'] == 'CPT®', 'hcpcs_cpt'] = df['code'].str.replace('CPT® ', '')

In [105]:
df['rev_code'] = df['rev_code'].str.split(' - ').str[0]

In [106]:
# Bring added columns to the beginning to make the next step easier
cols = df.columns.tolist()
cols = cols[-2:] + cols[:-2]
df = df[cols]

df = df.drop('CODE_DESC', axis=1)

In [107]:
df

Unnamed: 0,ms_drg,hcpcs_cpt,line_type,code,local_code,description,ndc,rev_code,plan,IP_Expected_Reimbursement,IP_Expected_Reimbursement_MIN,IP_Expected_Reimbursement_MAX,IP_cash_price,Gross_ip_chg,OP_Expected_Reimbursement,OP_Expected_Reimbursement_MIN,OP_Expected_Reimbursement_MAX,OP_cash_price,Gross_op_chg
0,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100313001-BLUE CROSS NETWORK S,52950.00,52950.00,135291.32,103723.34,450971.05,,,,,
1,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100313002-BLUE CROSS BAPTIST EMPLOYEE,52950.00,52950.00,135291.32,103723.34,450971.05,,,,,
2,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100315001-BLUE CROSS ARKANSAS EXCHANGE,52950.00,52950.00,135291.32,103723.34,450971.05,,,,,
3,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100316001-BLUE CROSS TENNESSEE NETWORK P,52950.00,52950.00,135291.32,103723.34,450971.05,,,,,
4,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100330002-BLUE CROSS MISSISSIPPI PPO,52950.00,52950.00,135291.32,103723.34,450971.05,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75520,,Q9967,ERX,HCPCS Q9967,10322,IOHEXOL 300 MG IODINE/ML INTRAVENOUS SOLUTION,00407-1413-58,0636,1100316001-BLUE CROSS TENNESSEE NETWORK P,Not reimbursed separately,Not reimbursed separately,Not reimbursed separately,66.34,288.45,,,,,
75521,,Q9967,ERX,HCPCS Q9967,10322,IOHEXOL 300 MG IODINE/ML INTRAVENOUS SOLUTION,00407-1413-58,0636,1100315001-BLUE CROSS ARKANSAS EXCHANGE,Not reimbursed separately,Not reimbursed separately,Not reimbursed separately,66.34,288.45,,,,,
75522,,Q9967,ERX,HCPCS Q9967,10322,IOHEXOL 300 MG IODINE/ML INTRAVENOUS SOLUTION,00407-1413-58,0636,1100313002-BLUE CROSS BAPTIST EMPLOYEE,Not reimbursed separately,Not reimbursed separately,Not reimbursed separately,66.34,288.45,,,,,
75523,,Q9967,ERX,HCPCS Q9967,10322,IOHEXOL 300 MG IODINE/ML INTRAVENOUS SOLUTION,00407-1413-58,0636,1100313001-BLUE CROSS NETWORK S,Not reimbursed separately,Not reimbursed separately,Not reimbursed separately,66.34,288.45,,,,,


In [108]:
cols = df.columns.tolist()

id_vars = cols[:9]
value_vars = ['IP_Expected_Reimbursement', 'OP_Expected_Reimbursement']

df_plan = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer', value_name='standard_charge')

In [109]:
# Set column `setting` to either `inpatient` if col `payer` starts with  'IP' or `outpatient` if col `payer` starts with  'OP'
df_plan['setting'] = np.where(df_plan['payer'].str.startswith('IP'), 'inpatient', np.where(df_plan['payer'].str.startswith('OP'), 'outpatient', 1))
df_plan['payer_category'] = 'payer'

In [110]:
df_plan

Unnamed: 0,ms_drg,hcpcs_cpt,line_type,code,local_code,description,ndc,rev_code,plan,payer,standard_charge,setting,payer_category
0,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100313001-BLUE CROSS NETWORK S,IP_Expected_Reimbursement,52950.00,inpatient,payer
1,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100313002-BLUE CROSS BAPTIST EMPLOYEE,IP_Expected_Reimbursement,52950.00,inpatient,payer
2,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100315001-BLUE CROSS ARKANSAS EXCHANGE,IP_Expected_Reimbursement,52950.00,inpatient,payer
3,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100316001-BLUE CROSS TENNESSEE NETWORK P,IP_Expected_Reimbursement,52950.00,inpatient,payer
4,166,,MS-LTC-DRG,MS-LTC166,166,Other respiratory system O.R. procedures with MCC,,,1100330002-BLUE CROSS MISSISSIPPI PPO,IP_Expected_Reimbursement,52950.00,inpatient,payer
...,...,...,...,...,...,...,...,...,...,...,...,...,...
151045,,Q9967,ERX,HCPCS Q9967,10322,IOHEXOL 300 MG IODINE/ML INTRAVENOUS SOLUTION,00407-1413-58,0636,1100316001-BLUE CROSS TENNESSEE NETWORK P,OP_Expected_Reimbursement,,outpatient,payer
151046,,Q9967,ERX,HCPCS Q9967,10322,IOHEXOL 300 MG IODINE/ML INTRAVENOUS SOLUTION,00407-1413-58,0636,1100315001-BLUE CROSS ARKANSAS EXCHANGE,OP_Expected_Reimbursement,,outpatient,payer
151047,,Q9967,ERX,HCPCS Q9967,10322,IOHEXOL 300 MG IODINE/ML INTRAVENOUS SOLUTION,00407-1413-58,0636,1100313002-BLUE CROSS BAPTIST EMPLOYEE,OP_Expected_Reimbursement,,outpatient,payer
151048,,Q9967,ERX,HCPCS Q9967,10322,IOHEXOL 300 MG IODINE/ML INTRAVENOUS SOLUTION,00407-1413-58,0636,1100313001-BLUE CROSS NETWORK S,OP_Expected_Reimbursement,,outpatient,payer


In [111]:
# Move on to creating df with only rates and no plans
df = df.drop(['plan', 'IP_Expected_Reimbursement', 'OP_Expected_Reimbursement'], axis=1)

In [112]:
cols = df.columns.tolist()
id_vars = cols[:8]
value_vars = cols[8:]

df = pd.melt(df, id_vars=id_vars, value_vars=value_vars, var_name='payer', value_name='standard_charge')

In [113]:
df['setting'] = np.where(df['payer'].str.startswith('IP'), 'inpatient', np.where(df['payer'].str.startswith('OP'), 'outpatient', 1))
df['setting'] = np.where(df['payer'].str.contains('_ip_'), 'inpatient', np.where(df['payer'].str.contains('_op_'), 'outpatient', df['setting']))
df['payer_category'] = df['payer'].apply(payer_category)
df['plan'] = ''

In [114]:
df_concat = pd.concat([df, df_plan])

In [115]:
df_concat.dropna(subset=['standard_charge'], inplace=True)

In [116]:
df = df_concat
df = df[df['standard_charge'] != 'Not reimbursed separately']


In [117]:
df.loc[df['hcpcs_cpt'].str.len() > 5, 'hcpcs_cpt'] = pd.NA

In [118]:
df['hospital_id'] = '442010'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['hospital_id'] = '442010'


In [119]:
df['hcpcs_cpt'].fillna('""', inplace=True)
df['code'].fillna('""', inplace=True)
df['rev_code'].fillna('""', inplace=True)
df['ndc'].fillna('""', inplace=True)
df['plan'].fillna('""', inplace=True)
df['ms_drg'].fillna('""', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['hcpcs_cpt'].fillna('""', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['code'].fillna('""', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rev_code'].fillna('""', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ndc'].fillna('""', inplace=True)
A value 

In [122]:
df['standard_charge'] = df['standard_charge'].str.replace(',', '')
df.dropna(subset=['standard_charge'], inplace=True)

df['payer'] = df['payer'].str.strip()
df['plan'] = df['plan'].str.strip()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['standard_charge'] = df['standard_charge'].str.replace(',', '')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(subset=['standard_charge'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['payer'] = df['payer'].str.strip()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer

TypeError: 'Index' object is not callable

In [None]:
df.drop_duplicates(subset=['ms_drg', 'hcpcs_cpt', 'code', 'local_code', 'ndc', 'rev_code', 'payer', 'setting', 'payer_category', 'plan'], inplace=True)

In [123]:
df.columns

Index(['ms_drg', 'hcpcs_cpt', 'line_type', 'code', 'local_code', 'description',
       'ndc', 'rev_code', 'payer', 'standard_charge', 'setting',
       'payer_category', 'plan', 'hospital_id'],
      dtype='object')

In [121]:
df.to_csv('data.csv', index=False)