# Calculating estimate of excess services in dollars
This step follows Step_A. It loads the stored metric results from different harmonization/mapping algorithms and imputes associated costs.

In [None]:
# set up environment
from cw_package.setup_cw_env import *
from pylab import *
from cw_package import prDF
import pickle
import re
jacks_verification

In [None]:
# name for file output at end of this notebook
coded_date = '2017_07_12allfeed_woI10'

# Per mdsave.com for zipcode 37212, low-ball estimate on 11/11/2016
costof_vitd_test =40 

## Description of functions:

theoretical_costs() 
    - this function extrapolates the costs of the tests/services for the numerator and denominator of the metric examined.
    
     Inputs:
        Ref cost table (RVU calculated)
        Drug ref cost table (Medicaid data)
        Vit D serum testing cost (set at top of this file)
        Metric ?dataframes v. Dictionary of dictionaries
        Metric identifier (string)
        Term ('numerator'/'denominator')
        
    Use:
        This function is employed by the perform_dollar_est() function.
        
perform_dollar_est()
    - This function calculates the theoretical costs for the metrics; intended for single step comprehensive calculation
    
    Input:
        key_list (list of string identifiers for individual CW_metric)
        dollar_ref (RVU ref table)
        drugdollar_ref (Medicaid drug ref table)
        cost of vitD (set at start of this notebook)
        phi_DDs (list of tuples of dictionaries of dictionaires for individual GEMs -- ??)
        
stitch_metricterms()
    - function to put all dictionary data for single GEM and single term in one dataframe
    
compare_outputs_au()
    - function to compare output with the annotated-gold-standard 
    - This IS NOT ACTIVE - early function drafted, but replaced by separate ipython notebook.
    

In [None]:
def theoretical_costs(ref_costs, drugref_costs,costof_vitd1, cw_DDs, metric, term):
    cw_DDs2= cw_DDs['{}'.format(metric)]['{}'.format(term)].loc[:,['MRN','ENC_ID','TEST_CODE','TEST_DATE','ICD9_subcode','TEST_DATE_month','CLAIM_DATE_month']]
    if metric in ['CW_psyc','CW_narc']:
        cw_DDs3 = cw_DDs2.merge(drugref_costs[['old_names_tokenized','Drug Codes', 'NDC', 'Median_NADAC_PerUnit']],
                                left_on='TEST_CODE', right_on='Drug Codes',how='left')
        cw_DDs3.rename(columns = {'Median_NADAC_PerUnit':'dollar_reimb'},inplace='True')
    elif metric in ['CW_vitd']:
        # this chunk is needed b/c none of the vitd codes have RVUs associated with them
        cw_DDs3 = cw_DDs2.merge(ref_costs[['HCPCS','dollar_reimb']],
                                left_on='TEST_CODE', right_on='HCPCS',how='left')
        cw_DDs3['vitdcost'] = costof_vitd1
        cw_DDs3.rename(columns={'dollar_reimb':'RVU_dollars','vitdcost':'dollar_reimb'},inplace='True')
    else:    
        cw_DDs3 = cw_DDs2.merge(ref_costs[['HCPCS','dollar_reimb']],left_on='TEST_CODE', right_on='HCPCS',how='left')
    cw_DDs3['Term']=term
    return cw_DDs3

def perform_dollar_est(key_list, dollar_ref,drugdollar_ref,costof_vitd, phi_DDs):
    # phi_DDs is a list of tuples of dictionaries of dictionaries, each prepared by different GEM
    # dollar_ref is the CPT:$reimbursement reference table
    # key_list is the list of CW_metrics
    output = {}
    for x in phi_DDs:
        num = {}
        den ={}
        for y in key_list:
            try:
                num[y]=theoretical_costs(dollar_ref, drugdollar_ref,costof_vitd, x[1],y,'numerator')
                den[y]=theoretical_costs(dollar_ref, drugdollar_ref,costof_vitd, x[1],y,'denominator')
            except:
                try:
                    den[y]=theoretical_costs(dollar_ref, drugdollar_ref, costof_vitd, x[1],y,'denominator')
                except:
                    pass
        output[x[0]]={'numerator':num,'denominator':den}
    return output


def stitch_metricterms(dict_dataframes, term_nd):
    widget = dict_dataframes
    try:
        widget_concat = pd.concat([widget['CW_cerv'][term_nd],
               widget['CW_card'][term_nd],
               widget['CW_vitd'][term_nd],
               widget['CW_bph'][term_nd],
               widget['CW_lbp'][term_nd],
               widget['CW_feed'][term_nd],
               widget['CW_psyc'][term_nd],
               widget['CW_dexa'][term_nd],
               widget['CW_narc'][term_nd],
               widget['CW_nonpreop'][term_nd],
               widget['CW_catpreop'][term_nd]])
    except:
        widget_concat = pd.concat([widget['CW_cerv'][term_nd],
               widget['CW_card'][term_nd],
               widget['CW_vitd'][term_nd],
               widget['CW_bph'][term_nd],
               widget['CW_lbp'][term_nd],
              # widget['CW_feed']['numerator'],
               widget['CW_psyc'][term_nd],
               widget['CW_dexa'][term_nd],
               widget['CW_narc'][term_nd],
               widget['CW_nonpreop'][term_nd],
               widget['CW_catpreop'][term_nd]])
    widget_concat=widget_concat[['ENC_ID','MRN','TEST_CODE','TEST_DATE_a', 'TEST_DATE_month','CLAIM_DATE_month', 'CLAIM_DATE','ADM_DT_a', 'AgeAtTest']]
    widget_concat['MRN']=widget_concat['MRN'].apply(lambda x: re.sub('^0{1,2}','',x))
    return widget_concat


In [None]:
elevenkeys= ['CW_cerv','CW_card','CW_vitd','CW_bph','CW_lbp','CW_feed','CW_psyc','CW_dexa','CW_narc',
             'CW_nonpreop','CW_catpreop']

terms = ['numerator','denominator']

Import Reference Data

In [None]:
CMMS_dollars = pd.read_csv('./ref/Cost_analy_ref/RVU16A_lastcolumndollars_Jack_use.csv',header=0)

# MOD column (NaN: bucketed costs, 26: professional component, TC: technical component, 53: aborted/noncompleted CPT)
CMMS_dollars= CMMS_dollars[['HCPCS','MOD','DESCRIPTION','WORK RVU','FACILITY PE RVU','MP RVU', 'FACILITY TOTAL',
                            'CONV FACTOR', 'Calc_reimb_GPCIof1']]
CMMS_dollars = CMMS_dollars[CMMS_dollars.MOD.isnull()]
CMMS_dollars = CMMS_dollars.iloc[:,[0,2,6,8]]
CMMS_dollars = CMMS_dollars.rename(columns={'Calc_reimb_GPCIof1':'dollar_reimb'},inplace=False)

In [None]:
#Medicaiddrug_dollars = pd.read_csv('./ref/Cost_analy_ref/single_drugcodescost_table.csv')
Medicaiddrug_dollars = pd.read_csv('./ref/Cost_analy_ref/single_drugcodescost_table_2017_07_16.csv')
Medicaiddrug_dollars = Medicaiddrug_dollars[['old_names_tokenized', 'Drug Codes', 'NDC', 'Median_Per_Unit_Cost', 'Pricing_Unit', 
                                             'Explanation_Code','As of Date']]
Medicaiddrug_dollars.rename(columns = {'Median_Per_Unit_Cost':'Median_NADAC_PerUnit'},inplace='True')

### Importing pickled results, by GEM, for all CW_metrics
These pickled results were created by Step_A.ipynb
   
   
>the "p-" prefix stands for "pickled"

In [None]:
p_10to10 = pd.read_pickle('./exported/'+coded_date+'/pickled_f_10to10_'+coded_date+'.p')
pclaimto9_reimb = pd.read_pickle('./exported/'+coded_date+'/pickled_f_claimto9_reimb_'+coded_date+'.p')
pclaimto9_bestMap10 = pd.read_pickle('./exported/'+coded_date+'/pickled_f_claimto9_bestMap10_'+coded_date+'.p')
prefto10_bestMap9 = pd.read_pickle('./exported/'+coded_date+'/pickled_f_refto10_bestMap9_'+coded_date+'.p')
prefto10_gems = pd.read_pickle('./exported/'+coded_date+'/pickled_f_refto10_gems_'+coded_date+'.p')

In [None]:
finaltally = pd.read_pickle('./exported/'+coded_date+'/pickled_tallied_ratios_'+coded_date+'.p')

#### Code that re-counts total cases in numerator and denominator from monthly data 

In [None]:
# skip while no monthly grouping active in code
"""

num_claimto9_bestMap10 = pd.read_csv('./exported/'+coded_date+'/claimto9_bestMap10numerator'+coded_date+'.csv')
num_claimto9_bestMap10.rename(columns={'Unnamed: 0':'Term'},inplace='True')
num_claimto9_bestMap10['Term']='numerator'
den_claimto9_bestMap10 = pd.read_csv('./exported/'+coded_date+'/claimto9_bestMap10denominator'+coded_date+'.csv')
den_claimto9_bestMap10.rename(columns={'Unnamed: 0':'Term'},inplace='True')
den_claimto9_bestMap10['Term']='denominator'

# in retrospect on Jan 19 '17, unclear why variables' prefixes are mon_ and smon_
mon_claimto9_bestMap10 = pd.concat([num_claimto9_bestMap10, den_claimto9_bestMap10])
smon_claimto9_bestMap = mon_claimto9_bestMap10.groupby(['Metric','Term','TEST_DATE_month'])['MRN'].count()
smon_claimto9_bestMap.unstack('TEST_DATE_month')
"""

In [None]:
from pprint import pprint
pprint(finaltally)

#### Creating the double dictionary tuples, for each GEM, of numerators and denominators
    And then placing them in single list.

In [None]:
dd_p10to10 = ('ten_to_10', p_10to10)
dd_pclaimto9_bestMap10 = ('claimto9_bestMap10', pclaimto9_bestMap10)
dd_pclaimto9_reimb = ('claimto9_reimb', pclaimto9_reimb)
dd_prefto10_gems = ('refto10_gems', prefto10_gems)
dd_prefto10_bestMap9 = ('refto10_bestMap9', prefto10_bestMap9)

compiled_DDs = [dd_p10to10, dd_pclaimto9_bestMap10, dd_pclaimto9_reimb, dd_prefto10_gems, dd_prefto10_bestMap9]

This step strips away the CW_feed that are "na" b/c no mapping existed for the procedure codes.
(This was irrelevant in final version)

In [None]:
for x in compiled_DDs:
    try:
        if x[1]['CW_feed']['denominator_l']=='na':
            del x[1]['CW_feed'] 
            print('deletion committed')
    except:
        print('nothing')
        pass
        

In [None]:
#This cell is a testing cell to demonstrate calculation of total VitD dollars at stake, 
#          by denominator for the claimto9_bestMap10 GEM - stored in compiled_DDs[0][1]
theoretical_costs(CMMS_dollars, Medicaiddrug_dollars, costof_vitd_test,compiled_DDs[0][1], 'CW_vitd','denominator').head(3)
len(dd_pclaimto9_bestMap10[1]['CW_vitd']['denominator'])*40

### Applying the dollar estimation for all GEMs, metrics and terms --> stored in "single_costdictionary"
then manipulated through the omega dictionary, to the alpha dictionary, then concatenated into the output dataframe.
<br>    
The "output" dataframe is used further down to export it in a spreadsheet for easy visualization and storage.

In [None]:
single_costdictionary = perform_dollar_est(elevenkeys, CMMS_dollars, Medicaiddrug_dollars,costof_vitd_test, compiled_DDs)


In [None]:
#(omega['claimto9_bestMap10']['denominator']['CW_cerv'],Livia.groupby(['term']).sum())
omega={}
for x,y in single_costdictionary.items():
    #print('GEM-key is '+x)
    beta={}
    for a,b in y.items():
        beta[a]=pd.concat({c:pd.concat([d.groupby('Term')['dollar_reimb'].sum()],axis=1) for c,d in b.iteritems()},axis=0)
    omega[x]=beta
alpha={}
for x,y in omega.items():
    alpha[x]=pd.concat([y['numerator'],y['denominator']],axis=0)
for x,y in alpha.items():
    y.rename(columns={'dollar_reimb':'{}'.format(x)},inplace='True')
output=pd.concat((y for x,y in alpha.items()),axis=1, join='outer',names=x)


#### This creates summary table from pickled final tally that was imported far above

In [None]:
summ_table =pd.DataFrame([[col1,col2,col3[0],col3[1]] for col1, d in finaltally.items() for col2, col3 in d.items()],
                         columns=['GEM type','Metric','Numerator','Denominator'])
summ_table.rename(columns={2:'numerator',3:'denominator'},inplace='True')
summ_table=summ_table.pivot(index='Metric',columns='GEM type')
summ_table= summ_table.stack(0)

# Final outputs

In [None]:
os.getcwd()

In [None]:
os.chdir('./exported/Final Charts')
output.fillna(0).to_csv(coded_date+'/Final_composite_cost_estimate_'+coded_date+'.csv')
summ_table.to_csv(coded_date+'/Final_composite_counts_'+coded_date+'.csv')


### For inspecting the summary table's composite counts 
Differs in narc and psyc counts

In [None]:
colin={}
for x,y in single_costdictionary.items():
    #print('GEM-key is '+x)
    walsh={}
    for a,b in y.items():
        walsh[a]=pd.concat({c:pd.concat([d.groupby('Term')['MRN'].count()],axis=1) for c,d in b.iteritems()},axis=0)
    colin[x]=walsh
riverpo={}
for x,y in colin.items():
    riverpo[x]=pd.concat([y['numerator'],y['denominator']],axis=0)
for x,y in riverpo.items():
    y.rename(columns={'MRN':'{}'.format(x)},inplace='True')
thenile=pd.concat((y for x,y in riverpo.items()),axis=1, join='outer',names=x)


In [None]:
thenile.fillna(0).to_csv(coded_date+'/forcheckingthe_summtablefigures_'+coded_date+'.csv')
print('processing complete')