## Consolidating Annotations
Numerator v. Not Numerator

This notebook preprocesses the manually reviewed annotations which were prepared by serial reviews. Their formatting between reviews changed slightly, and needs standardization before comparison. This notebook exports the preprocessed annotations in three files, grouped by annotation set, and whether the metric denominators are defined by service provided (eg cataracts surgery) or by diagnosis (eg low back pain).

In [None]:
from cw_package.setup_cw_env import *
from pylab import *
from cw_package import prDF
import pickle
jacks_verification

In [None]:
from au_package import assess_agreement, prep_Au_standard, standardize_flags, prep_Auseibert_standard
from au_package import stitch_metricterms, flag_checker

In [None]:
coded_date = '2017_07_12allfeedwith_I10'
earliest_date= '2016_04_01'
time_period= ['2016_4','2016_5','2016_6','2016_7','2016_8','2016_9']
elevenkeys= ['CW_cerv','CW_card','CW_vitd','CW_bph','CW_lbp','CW_feed','CW_psyc','CW_dexa','CW_narc',
             'CW_nonpreop','CW_catpreop']
stdflag_string= 'as_annotated'

In [None]:
# simple rule to ensure parameter for standardize_flags() will be valid
flag_checker(stdflag_string)

In [None]:
#os.mkdir('preprocessed/')
try:
    os.mkdir('preprocessed/'+coded_date)
except:
    pass

In [None]:
# Import the original annotation spreadsheets, that have been manually "reculled" by removing duplicate/inappropriate records
#     Repeat the import b/c the annotations will be standardized in different ways by the 'prep_Au_standard()' method
Au_num_r={}
Au_den_r={}
Au_num_all={}
Au_den_all={}
for x in elevenkeys:
    try:
        print(x)
        Au_num_r[x]=pd.read_excel('../selfreview/Annotations_for_use/FINAL/old_Numerator_reviewedJuly.xlsx', header=0, sheetname=x)
        Au_den_r[x]=pd.read_excel('../selfreview/Annotations_for_use/FINAL/old_Denominator_reviewedJuly.xlsx', header=0, sheetname=x)
        Au_num_all[x]=pd.read_excel('../selfreview/Annotations_for_use/FINAL/old_Numerator_reviewedJuly.xlsx', header=0, sheetname=x)
        Au_den_all[x]=pd.read_excel('../selfreview/Annotations_for_use/FINAL/old_Denominator_reviewedJuly.xlsx', header=0, sheetname=x)
    except:
        print('exception passed for {}'.format(x))
        pass



## Standardize All Annotations

#### Standardize the Original Annotations
__r   suffix_ indicates restriction to time_period <br>
__all suffix_ indicates NO restriction to time_period

In [None]:
# RESTRICT TO TIME PERIOD
elevenkeys_trunc= ['CW_cerv','CW_card','CW_vitd','CW_dexa','CW_nonpreop','CW_catpreop']
(Au_num_r, Au_den_r)= prep_Au_standard(elevenkeys_trunc, Au_num_r, Au_den_r, time_period, restrict_to_timeperiod=True)
# NO RESTRICTION
(Au_num_all, Au_den_all)= prep_Au_standard(elevenkeys_trunc, Au_num_all, Au_den_all, time_period, restrict_to_timeperiod=False)


In [None]:
# Specifically tailored to original annotations' contents
Au_cerv_r =     {'Metric':'CW_cerv','Numerator':Au_num_r['CW_cerv'],'Denominator': Au_den_r['CW_cerv']}
Au_card_r=      {'Metric':'CW_card','Numerator':Au_num_r['CW_card'],'Denominator': Au_den_r['CW_card']}
Au_nonpreop_r = {'Metric':'CW_nonpreop','Numerator':Au_num_r['CW_nonpreop'],'Denominator':  Au_den_r['CW_nonpreop']}
Au_catpreop_r = {'Metric':'CW_catpreop','Numerator':Au_num_r['CW_catpreop'],'Denominator': Au_den_r['CW_catpreop']}
Au_dexa_r =     {'Metric':'CW_dexa','Numerator':Au_num_r['CW_dexa'], 'Denominator': Au_den_r['CW_dexa']}
Au_vitd_r =     {'Metric':'CW_vitd','Numerator':Au_num_r['CW_vitd'], 'Denominator':  Au_den_r['CW_vitd']}
#Au_feed_r =     {'Metric':'CW_feed','Numerator':Au_num_r['CW_feed'], 'Denominator': Au_den_r['CW_feed']}
#Au_psyc_r =     {'Metric':'CW_psyc','Numerator':Au_num_r['CW_psyc'],'Denominator': Au_den_r['CW_psyc']}
#Au_narc_r =     {'Metric':'CW_narc','Numerator':Au_num_r['CW_narc'],'Denominator': Au_den_r['CW_narc']}
#Au_bph_r =      {'Metric':'CW_bph','Numerator':Au_num_r['CW_bph'],'Denominator': Au_den_r['CW_bph']}

In [None]:
treasurechest_r = [Au_cerv_r, Au_card_r, 
                   Au_nonpreop_r, Au_catpreop_r, 
                   #Au_bph_r, Au_feed_r, Au_psyc_r, Au_narc_r,
                   Au_dexa_r, Au_vitd_r]

archive_tc_r  =treasurechest_r

#### Take a moment to pickle the prepped - but not standardized - original Gold Standards

In [None]:
f= open('preprocessed/'+coded_date+'/pickled_treasurechest_r_'+coded_date+'.p','w')
pickle.dump(treasurechest_r, f)
f.close()

In [None]:
"""
#########################################################################3
## Don't need these, which include annotations from outside analyzed period
Au_cerv_all =     {'Metric':'CW_cerv','Numerator':Au_num_all['CW_cerv'],'Denominator': Au_den_all['CW_cerv']}
Au_card_all=      {'Metric':'CW_card','Numerator':Au_num_all['CW_card'],'Denominator': Au_den_all['CW_card']}
Au_nonpreop_all = {'Metric':'CW_nonpreop','Numerator':Au_num_all['CW_nonpreop'],'Denominator':  Au_den_all['CW_nonpreop']}
Au_catpreop_all = {'Metric':'CW_catpreop','Numerator':Au_num_all['CW_catpreop'],'Denominator': Au_den_all['CW_catpreop']}
Au_dexa_all =     {'Metric':'CW_dexa','Numerator':Au_num_all['CW_dexa'], 'Denominator': Au_den_all['CW_dexa']}
Au_vitd_all =     {'Metric':'CW_vitd','Numerator':Au_num_all['CW_vitd'], 'Denominator':  Au_den_all['CW_vitd']}
#Au_feed_all =     {'Metric':'CW_feed','Numerator':Au_num_all['CW_feed'], 'Denominator': Au_den_all['CW_feed']}
#Au_psyc_all =     {'Metric':'CW_psyc','Numerator':Au_num_all['CW_psyc'],'Denominator': Au_den_all['CW_psyc']}
#Au_narc_all =     {'Metric':'CW_narc','Numerator':Au_num_all['CW_narc'],'Denominator': Au_den_all['CW_narc']}
#Au_bph_all =      {'Metric':'CW_bph','Numerator':Au_num_all['CW_bph'],'Denominator': Au_den_all['CW_bph']}


treasurechest_all = [Au_cerv_all, Au_card_all, 
                     Au_nonpreop_all, Au_catpreop_all,
                     #Au_bph_all, Au_feed_all, Au_psyc_all, Au_narc_all, 
                     Au_dexa_all, Au_vitd_all]

archive_tc_all=treasurechest_all



f= open('preprocessed/'+coded_date+'/pickled_treasurechest_all_'+coded_date+'.p','w')
pickle.dump(treasurechest_all, f)
f.close()
"""

## Standardize later (Seibert) annotations

In [None]:
# Load second annotations, performed by Seibert
Au_seibert_1={}
Au_seibert_2={}
for x in elevenkeys:
    try:
        
        Au_seibert_1[x]=pd.read_excel('../selfreview/Annotations_for_use/FINAL/new_FirstSeibert_feb28_reviewedJuly.xlsx', header=0, sheetname=x)
        Au_seibert_1[x]['TEST_DATE_month']=pd.to_datetime(Au_seibert_1[x]['TEST_DATE_a']).apply(lambda x:str(x.year)+'_'+str(x.month))
        print('imported {}'.format(x))
    except:
        print('exception passed for {}'.format(x))
        pass

In [None]:
print('\n second set of annotations by Seibert \n')
for x in elevenkeys:
    try:
        Au_seibert_2[x]=pd.read_excel('../selfreview/Annotations_for_use/FINAL/new_SecondSeibert_may14_reviewedJuly.xlsx', header=0, sheetname=x)
        Au_seibert_2[x]['TEST_DATE_month']=pd.to_datetime(Au_seibert_2[x]['TEST_DATE_a']).apply(lambda x:str(x.year)+'_'+str(x.month))
        print('imported {}'.format(x))
    except:
        print('exception passed for {}'.format(x))
        pass

In [None]:
Au_s1_standardized= prep_Auseibert_standard(elevenkeys, Au_seibert_1, time_period, restrict_to_timeperiod=True)
print('\n\npart 2\n_________________\n')
Au_s2_standardized= prep_Auseibert_standard(elevenkeys, Au_seibert_2, time_period, restrict_to_timeperiod=True)

In [None]:
# Specifically tailored to seibert first annotations' contents
Au_s1_cerv_r =     {'Metric':'CW_cerv','Annotated':Au_s1_standardized['CW_cerv']}
Au_s1_card_r=      {'Metric':'CW_card','Annotated':Au_s1_standardized['CW_card']}
Au_s1_vitd_r =     {'Metric':'CW_vitd','Annotated':Au_s1_standardized['CW_vitd']}
Au_s1_catpreop_r = {'Metric':'CW_catpreop','Annotated':Au_s1_standardized['CW_catpreop']}
Au_s1_dexa_r =     {'Metric':'CW_dexa','Annotated':Au_s1_standardized['CW_dexa']}
Au_s1_nonpreop_r = {'Metric':'CW_nonpreop','Annotated':Au_s1_standardized['CW_nonpreop']}

# Specifically tailored to original annotations' contents
Au_s2_bph_r =      {'Metric':'CW_bph','Annotated':Au_s2_standardized['CW_bph']}
Au_s2_lbp_r =      {'Metric':'CW_lbp','Annotated':Au_s2_standardized['CW_lbp']}
Au_s2_feed_r =     {'Metric':'CW_feed','Annotated':Au_s2_standardized['CW_feed']}
Au_s2_psyc_r =     {'Metric':'CW_psyc','Annotated':Au_s2_standardized['CW_psyc']}
Au_s2_narc_r =     {'Metric':'CW_narc','Annotated':Au_s2_standardized['CW_narc']}


In [None]:
treasurechest_s1_r = [Au_s1_cerv_r, 
                      Au_s1_card_r, 
                      Au_s1_catpreop_r, 
                      Au_s1_vitd_r,
                      Au_s1_dexa_r,
                      Au_s1_nonpreop_r]

treasurechest_s2_r = [Au_s2_bph_r, 
                      Au_s2_lbp_r, 
                      Au_s2_feed_r, 
                      Au_s2_psyc_r, 
                      Au_s2_narc_r ]

In [None]:

f= open('preprocessed/'+coded_date+'/pickled_treasurechest_s1_r_'+coded_date+'.p','w')
pickle.dump(treasurechest_s1_r, f)
f.close()


f= open('preprocessed/'+coded_date+'/pickled_treasurechest_s2_r_'+coded_date+'.p','w')
pickle.dump(treasurechest_s2_r, f)
f.close()

## *******Prepping for this notebook is complete********
What follows is just for visualization

In [None]:
"""# Create unified dataframe with all rows on single structure
for x in treasurechest_r:
    print(x['Metric'])
    x['linked']=pd.concat([standardize_flags(x['Numerator'],stdflag_string),standardize_flags(x['Denominator'],stdflag_string)],axis=0)
    # following line makes sure that incident_service counted once if it was in both numerator and denominator
    x['linked_a']=x['linked'].groupby(['MRN','TEST_CODE','Term_assessed','TEST_DATE_month','Gold_Standard']).count()
    x['linked_a']=x['linked_a'].reset_index()
    x['linked_a']['Metric']=x['Metric']"""

In [None]:
"""for x in treasurechest_all:
    print(x['Metric'])
    x['linked']=pd.concat([standardize_flags(x['Numerator'],'as_annotated'),standardize_flags(x['Denominator'],'as_annotated')],axis=0)
    # following line makes sure that incident_service counted once if it was in both numerator and denominator
    x['linked_a']=x['linked'].groupby(['MRN','TEST_CODE','Term_assessed','TEST_DATE_month','Gold_Standard']).count()
    x['linked_a']=x['linked_a'].reset_index()
    x['linked_a']['Metric']=x['Metric']
    """

## Counts of available annotations

In [None]:
"""annotation_tally_r={}
for x in treasurechest_r:
    try:
        placeholder= x['linked_a'].groupby('Term_assessed')['Metric'].count()
        placeholder_d = placeholder.to_dict()
        annotation_tally_r[x['Metric']]=placeholder_d
    except:
        print('passing')
        pass"""

In [None]:
"""annotation_tally_all={}
for x in treasurechest_all:
    try:
        placeholder= x['linked_a'].groupby('Term_assessed')['Metric'].count()
        placeholder_d = placeholder.to_dict()
        annotation_tally_all[x['Metric']]=placeholder_d
    except:
        print('passing')
        pass"""

In [None]:
"""r_tally=pd.DataFrame.from_dict(annotation_tally_r, orient='index')
r_tally.rename(columns={'Numer':'Numerators w/in time period','Not_Numer':'Not Numerators w/in time period'},inplace=True)
all_tally=pd.DataFrame.from_dict(annotation_tally_all, orient='index')
all_tally.rename(columns={'Numer':'Numerators all dates','Not_Numer':'Not Numerators all dates'},inplace=True)

together_tally = pd.concat([r_tally, all_tally], axis=1)"""

## Export the tallies of annotations

In [None]:
"""export=together_tally.fillna('')[['Numerators w/in time period','Numerators all dates','Not Numerators w/in time period','Not Numerators all dates']]
export.to_csv('./exported/Final Charts/'+coded_date+'/tally_of_availableannotations_'+coded_date+'_data.csv')"""