In [1]:
import psycopg2
import pandas as pd
import numpy as np
import regex as re


In [2]:
p_23 = 'data/prescriptions_23.csv'
df_22 = 'data/h243.xlsx'
c_23 = 'data/conditions_23.csv'
o_23 = 'data/outpatient_23.csv'

## Load Data

In [3]:
def load_prescription_file(file,year):
    RXXP = 'RXXP'+year+'X'
    RXSF = 'RXSF'+year+'X'
    return pd.read_csv(file,usecols=['DUPERSID','RXRECIDX','LINKIDX','TC1S1_1','DIABEQUIP',RXXP,RXSF])


In [4]:
def load_conditions_file(file,year):
    WT = 'PERWT'+year+'F'
    return pd.read_csv(file,usecols = ['DUPERSID','CONDIDX','ICD10CDX','HHCOND','IPCOND','OPCOND','OBCOND','ERCOND','RXCOND',WT])

In [5]:
def load_outpatient_file(file,year):
    tot_exp = 'OPXP'+year+'X'
    oop_doc = 'OPDSF'+year+'X'
    oop_fac = 'OPFSF'+year+'X'
    mdcare_doc = 'OPDMR'+year+'X'
    mdcaid_doc = 'OPDMD'+year+'X'
    priv_doc = 'OPDPV'+year+'X'
    mdcare_fac = 'OPFMR'+year+'X'
    mdcaid_fac = 'OPFMD'+year+'X'
    priv_fac = 'OPFPV'+year+'X'
    WT = 'PERWT'+year+'F'
    return pd.read_csv(file,usecols=['DUPERSID','EVNTIDX',tot_exp,oop_doc,oop_fac,mdcare_doc,mdcare_fac,mdcaid_doc,mdcaid_fac,priv_doc,priv_fac,WT])


In [6]:
def rename_cols(df):
    old_cols = df.columns
    new_cols = [re.sub('[0-9]','',i) for i in old_cols]
    return df.rename(columns=dict(zip(old_cols,new_cols)))

In [None]:
#remove years from column names 
cond_23 =  rename_cols(load_conditions_file(c_23,'23'))
out_23 = rename_cols(load_outpatient_file(o_23,'23'))
prescription_23 =  rename_cols(load_prescription_file(p_23,'23'))

In [None]:
#find condition associated with event
out_23['CONDIDX'] = out_23['EVNTIDX'].astype(str).str[:13]
out_23


Unnamed: 0,DUPERSID,EVNTIDX,OPXP23X,OPFSF23X,OPFMR23X,OPFMD23X,OPFPV23X,OPDSF23X,OPDMR23X,OPDMD23X,OPDPV23X,PERWT23F,CONDIDX
0,2790002101,2790002101001401,187.42,60.0,0.0,0.00,127.42,0.0,0.00,0.0,0.00,11664.426820,2790002101001
1,2790011102,2790011102006201,253.10,132.0,0.0,0.00,0.00,0.0,121.10,0.0,0.00,10754.599230,2790011102006
2,2790011102,2790011102006301,512.32,132.0,0.0,0.00,0.00,0.0,210.86,0.0,169.46,10754.599230,2790011102006
3,2790011102,2790011102006401,132.00,132.0,0.0,0.00,0.00,0.0,0.00,0.0,0.00,10754.599230,2790011102006
4,2790011102,2790011102006501,132.00,132.0,0.0,0.00,0.00,0.0,0.00,0.0,0.00,10754.599230,2790011102006
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20715,2819784105,2819784105001501,53.09,0.0,0.0,53.09,0.00,0.0,0.00,0.0,0.00,4684.900838,2819784105001
20716,2819784105,2819784105001601,53.09,0.0,0.0,53.09,0.00,0.0,0.00,0.0,0.00,4684.900838,2819784105001
20717,2819784105,2819784105001701,53.09,0.0,0.0,53.09,0.00,0.0,0.00,0.0,0.00,4684.900838,2819784105001
20718,2819784105,2819784105001801,73.21,0.0,0.0,73.21,0.00,0.0,0.00,0.0,0.00,4684.900838,2819784105001


In [None]:
#Condition index and person weight for diabetes 
conditions_diabetes_23 = cond_23[cond_23['ICD10CDX']=='E11'][['CONDIDX','PERWT23F']]
conditions_diabetes_23['CONDIDX'] = conditions_diabetes_23['CONDIDX'].astype(str)


In [None]:
#outptient event associated with diabetes
#test?
outpatient_diabetes_23 = pd.merge(left=conditions_diabetes_23,right=out_23,on='CONDIDX')
outpatient_diabetes_23[['DUPERSID','EVNTIDX']] = outpatient_diabetes_23[['DUPERSID','EVNTIDX']].astype(str)
outpatient_diabetes_23

Unnamed: 0,CONDIDX,PERWT23F_x,DUPERSID,EVNTIDX,OPXP23X,OPFSF23X,OPFMR23X,OPFMD23X,OPFPV23X,OPDSF23X,OPDMR23X,OPDMD23X,OPDPV23X,PERWT23F_y
0,2790002101001,11664.42682,2790002101,2790002101001401,187.42,60.0,0.00,0.0,127.42,0.0,0.00,0.0,0.00,11664.42682
1,2790192101003,18545.85882,2790192101,2790192101003001,541.73,0.0,432.00,0.0,109.73,0.0,0.00,0.0,0.00,18545.85882
2,2790425101003,21489.40535,2790425101,2790425101003601,408.52,0.0,206.80,0.0,52.76,0.0,119.15,0.0,29.81,21489.40535
3,2790425101003,21489.40535,2790425101,2790425101003801,208.90,0.0,78.93,0.0,20.13,0.0,87.92,0.0,21.92,21489.40535
4,2790425101003,21489.40535,2790425101,2790425101003901,259.56,0.0,206.80,0.0,52.76,0.0,0.00,0.0,0.00,21489.40535
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
542,2819746101003,15185.16367,2819746101,2819746101003401,714.96,0.0,569.81,0.0,145.15,0.0,0.00,0.0,0.00,15185.16367
543,2819746101003,15185.16367,2819746101,2819746101003501,746.68,0.0,595.24,0.0,151.44,0.0,0.00,0.0,0.00,15185.16367
544,2819746101003,15185.16367,2819746101,2819746101003601,100.61,0.0,100.61,0.0,0.00,0.0,0.00,0.0,0.00,15185.16367
545,2819746101003,15185.16367,2819746101,2819746101003701,43.28,0.0,33.29,0.0,9.99,0.0,0.00,0.0,0.00,15185.16367


In [None]:
#sum of expenditures by person 
sum_outpatient_payments_23 = outpatient_diabetes_23.groupby('DUPERSID').agg({'OPXP23X':'sum','OPFSF23X':'sum','OPFMR23X':'sum','OPFMD23X':'sum','OPFPV23X':'sum','OPDSF23X':'sum','OPDMR23X':'sum','OPDMD23X':'sum','OPDPV23X':'sum'})

In [None]:
#associated person weight for person 
sum_outpatient_payments_23 = pd.merge(left = sum_outpatient_payments_23, right= outpatient_diabetes_23[['DUPERSID','PERWT23F_x']], on='DUPERSID')

In [None]:
#weighted expenditures 
payment_columns = ['OPXP23X','OPFSF23X','OPFMR23X','OPFMD23X','OPFPV23X','OPDSF23X','OPDMR23X','OPDMD23X','OPDPV23X']
sum_outpatient_payments_23[payment_columns] = sum_outpatient_payments_23[payment_columns].multiply(sum_outpatient_payments_23['PERWT23F_x'],axis='index')


Unnamed: 0,DUPERSID,OPXP23X,OPFSF23X,OPFMR23X,OPFMD23X,OPFPV23X,OPDSF23X,OPDMR23X,OPDMD23X,OPDPV23X,PERWT23F_x
0,2790002101,2.186147e+06,699865.6092,0.000000e+00,0.0,1.486281e+06,0.0,0.000000e+00,0.0,0.000000e+00,11664.42682
1,2790192101,1.004685e+07,0.0000,8.011811e+06,0.0,2.035037e+06,0.0,0.000000e+00,0.0,0.000000e+00,18545.85882
2,2790425101,1.884578e+07,0.0000,1.058418e+07,0.0,2.700144e+06,0.0,4.449811e+06,0.0,1.111647e+06,21489.40535
3,2790425101,1.884578e+07,0.0000,1.058418e+07,0.0,2.700144e+06,0.0,4.449811e+06,0.0,1.111647e+06,21489.40535
4,2790425101,1.884578e+07,0.0000,1.058418e+07,0.0,2.700144e+06,0.0,4.449811e+06,0.0,1.111647e+06,21489.40535
...,...,...,...,...,...,...,...,...,...,...,...
542,2819746101,4.643972e+07,0.0000,3.730570e+07,0.0,9.134028e+06,0.0,0.000000e+00,0.0,0.000000e+00,15185.16367
543,2819746101,4.643972e+07,0.0000,3.730570e+07,0.0,9.134028e+06,0.0,0.000000e+00,0.0,0.000000e+00,15185.16367
544,2819746101,4.643972e+07,0.0000,3.730570e+07,0.0,9.134028e+06,0.0,0.000000e+00,0.0,0.000000e+00,15185.16367
545,2819746101,4.643972e+07,0.0000,3.730570e+07,0.0,9.134028e+06,0.0,0.000000e+00,0.0,0.000000e+00,15185.16367


In [127]:
outpatient_cost_per_person_23 = sum_outpatient_payments_23.drop('PERWT23F_x',axis=1)

In [128]:
outpatient_cost_per_person_23

Unnamed: 0,DUPERSID,OPXP23X,OPFSF23X,OPFMR23X,OPFMD23X,OPFPV23X,OPDSF23X,OPDMR23X,OPDMD23X,OPDPV23X
0,2790002101,2.186147e+06,699865.6092,0.000000e+00,0.0,1.486281e+06,0.0,0.000000e+00,0.0,0.000000e+00
1,2790192101,1.004685e+07,0.0000,8.011811e+06,0.0,2.035037e+06,0.0,0.000000e+00,0.0,0.000000e+00
2,2790425101,1.884578e+07,0.0000,1.058418e+07,0.0,2.700144e+06,0.0,4.449811e+06,0.0,1.111647e+06
3,2790425101,1.884578e+07,0.0000,1.058418e+07,0.0,2.700144e+06,0.0,4.449811e+06,0.0,1.111647e+06
4,2790425101,1.884578e+07,0.0000,1.058418e+07,0.0,2.700144e+06,0.0,4.449811e+06,0.0,1.111647e+06
...,...,...,...,...,...,...,...,...,...,...
542,2819746101,4.643972e+07,0.0000,3.730570e+07,0.0,9.134028e+06,0.0,0.000000e+00,0.0,0.000000e+00
543,2819746101,4.643972e+07,0.0000,3.730570e+07,0.0,9.134028e+06,0.0,0.000000e+00,0.0,0.000000e+00
544,2819746101,4.643972e+07,0.0000,3.730570e+07,0.0,9.134028e+06,0.0,0.000000e+00,0.0,0.000000e+00
545,2819746101,4.643972e+07,0.0000,3.730570e+07,0.0,9.134028e+06,0.0,0.000000e+00,0.0,0.000000e+00
