In [None]:
import IPython.display
from collections import defaultdict
import textwrap
import tools_util as tu
import pyrheautils
import os.path
import yaml
from typebase import CareTier, PatientOverallHealth, DiagClassA
import phacsl.utils.formats.csv_tools as csv_tools


## Some Utility Routines ##

In [None]:
def addTransfersToTable(srcFN, srcSchema, tbl=None):
    if tbl is None:
        tbl = {}
    #print 'Importing the weight data file %s' % srcFN
    pairsSeen = set()
    rawTbl = pyrheautils.importConstants(pyrheautils.pathTranslate(srcFN), srcSchema)
    for srcName, rec in rawTbl.items():
        if srcName not in tbl:
            tbl[srcName] = {}
        for destName, ct in rec.items():
            if (srcName, destName) in pairsSeen:
                raise RuntimeError('Duplicate weight table entries for %s -> %s' %
                                    (srcName, destName))
            else:
                pairsSeen.add((srcName, destName))
                tbl[srcName][destName] = float(ct)
    return tbl



In [None]:
def recsToDict(recL, lblKey):
    rslt = {}
    for rec in recL:
        newR = {}
        recKey = rec[lblKey]
        for key, val in rec.items():
            if key != lblKey:
                if key.startswith('To_'):
                    newR[key[3:]] = val
        rslt[recKey] = newR
    return rslt

In [None]:
def cleanTransferTbl(tbl, fnForMsg, obsoleteNameD):
    inToOutD = defaultdict(lambda: 0)
    outToInD = defaultdict(lambda: 0)
    cleanedTbl = {}
    for src, rec in tbl.items():
        if src in obsoleteNameD:
            if obsoleteNameD[src] in tbl:
                print 'Src entries for both %s and %s in %s' % (src, obsoleteNameD[src], fnForMsg)
                continue
            print 'replacing src %s with new name %s'
            src = obsoleteNameD[src]

        if src in excludedFacDict:
            for dst, ct in rec.items():
                if dst in obsoleteNameD:
                    if obsoleteNameD[dst] in rec:
                        print 'Dst entries for both %s and %s in %s' % (src, obsoleteNameD[src], fnForMsg)
                        continue
                    print 'replacing %s with new name %s'
                    dst = obsoleteNameD[dst]
                if dst in facDict:
                    outToInD[dst] += ct
                    #print 'out to in: %s -> %s %s' % (src, dst, ct)
        else:
            assert src in facDict or src == 'COMMUNITY', 'unknown src %s in %s' % (src, fnForMsg)
            cleanedTbl[src] = {}
            for dst, ct in rec.items():
                if dst in obsoleteNameD:
                    if obsoleteNameD[dst] in rec:
                        print 'Dst entries for both %s and %s in %s' % (src, obsoleteNameD[src], fnForMsg)
                        continue
                    print 'replacing dst %s with new name %s'
                    dst = obsoleteNameD[dst]
                if dst in excludedFacDict:
                    #print '%s -> %s is going to an excluded destination' % (src, dst)
                    inToOutD[src] += ct
                    continue
                assert dst in facDict, 'unknown dst %s in %s' % (dst, fnForMsg)
                cleanedTbl[src][dst] = ct
    return cleanedTbl, inToOutD, outToInD


In [None]:
def tablesMatch(tbl1, tbl2):
    if tbl1 is None:
        if tbl2 is None:
            return True
        else:
            return False
    elif tbl2 is None:
        return False
    else:
        return {k: v for k, v in tbl1.items()} == {k: v for k, v in tbl2.items()}

def tableNonNeg(tbl):
    return all([all([v >= 0 for v in r.values()]) for r in tbl.values()])

def writeYamlComment(f, comment=None):
    if comment is not None:
        lines = textwrap.wrap(comment)
        for line in lines:
            f.write('# %s\n' % line)

def checkAndMaybeRewrite(rawPath, valD, comment=None, createOnlyThese=None):
    assert tableNonNeg(valD), 'Some elements of the given new value table are negative'
    fn = pyrheautils.pathTranslate(rawPath)
    if os.path.exists(fn):
        with open(fn, 'rU') as f:
            oldJSON = yaml.load(f)
        if tablesMatch(oldJSON, valD):
            print 'Old and new versions of %s match' % rawPath
        else:
            if createOnlyThese is None or rawPath in createOnlyThese:
                print 'Old and new versions of %s differ' % rawPath
                reply = raw_input('overwrite? [yN]')
                if len(reply) == 1 and reply in 'yY':
                    print 'overwriting old version of %s' % rawPath
                    with open(fn, 'w') as f:
                        writeYamlComment(f, comment)
                        yaml.safe_dump(valD, f, default_flow_style=True, indent=4,
                                       encoding='utf-8', width=130, explicit_start=True)
                else:
                    print 'old version of %s was NOT overwritten' % rawPath
            else:
                print 'did not create %s because it was not on the list to be created' % rawPath
    else:
        if createOnlyThese is None or rawPath in createOnlyThese:
            print 'Writing %s because there is no old version' % rawPath
            with open(fn, 'w') as f:
                writeYamlComment(f, comment)
                yaml.safe_dump(valD, f, default_flow_style=True, indent=4,
                                encoding='utf-8', width=130, explicit_start=True)
        else:
            print '%s did not exist and was not created' % rawPath

In [None]:
def sumOfTableValues(tbl):
    return sum([sum(rec.values()) for rec in tbl.values()])

## The following block loads the model ##

In [None]:
homeDir = '/home/welling/git/pyRHEA_github/src/sim'
inputDict = tu.readModelInputs(os.path.join(homeDir, 'week_run_OC.yaml'))
pyrheautils.prepPathTranslations(inputDict, homeDir=homeDir)
print inputDict['facilityDirs']
for dn in inputDict['facilityDirs']: print pyrheautils.pathTranslate(dn)
facDict = tu.getFacDict(inputDict)

In [None]:
excludedFacDict = tu.parseFacilityData(pyrheautils.pathTranslate('$(MODELDIR)/facilityfacts_excluded'))

In [None]:
# This is a table of RHEA 1.0 -> RHEA 2.0 name changes
obsoleteNameD = {'FDCT': 'FAIR', 'CCHS': 'SCNH', 'SJMC': 'SJNH', 'CMCS': 'CPNH'}

## Load the YAML constants for the transfer pattern implementations ##

In [None]:
indirectConstantsFN = pyrheautils.pathTranslate('$(MODELDIR)/constants/indirecttransferdestination_constants.yaml')
indirectSchemaFN = 'indirecttransferdestination_constants_schema.yaml'
directConstantsFN = pyrheautils.pathTranslate('$(MODELDIR)/constants/transferbydrawwithreplacement_constants.yaml')
directSchemaFN = 'transferbydrawwithreplacement_constants_schema.yaml'
categoryDirectConstantsFN = pyrheautils.pathTranslate('$(MODELDIR)/constants/categorydrawwithreplacement_constants.yaml')
categoryDirectSchemaFN = directSchemaFN
communityConstantsFN = pyrheautils.pathTranslate('$(MODELDIR)/constants/community_constants.yaml')
communitySchemaFN = 'community_constants_schema.yaml'
indirectJSON = pyrheautils.importConstants(indirectConstantsFN, indirectSchemaFN)
directJSON = pyrheautils.importConstants(directConstantsFN, directSchemaFN)
categoryDirectJSON = pyrheautils.importConstants(categoryDirectConstantsFN, categoryDirectSchemaFN)
communityConstantsJSON = pyrheautils.importConstants(communityConstantsFN, communitySchemaFN)


## Load the yaml tabular transfer data ##

and remember their filenames

In [None]:
allNeededYamlTables = []

indirectTbl = {}
print 'loading indirect transfers'
for fn in indirectJSON['transferFilePaths']:
    indirectTbl = addTransfersToTable(fn, indirectJSON['transferFileSchema'], indirectTbl)
    allNeededYamlTables.append(fn)

directTbl = {}
print 'loading direct transfers'
for fn in directJSON['transferFilePaths']:
    directTbl = addTransfersToTable(fn, indirectJSON['transferFileSchema'], directTbl)
    allNeededYamlTables.append(fn)

categoryDirectTbl = {}
print 'loading category direct transfers'
for fn in categoryDirectJSON['transferFilePaths']:
    categoryDirectTbl = addTransfersToTable(fn, indirectJSON['transferFileSchema'], categoryDirectTbl)
    allNeededYamlTables.append(fn)

bypassIndirectTbl = {}
print 'loading bypass indirect transfers'
for fn in indirectJSON['bypassTransferFilePaths']:
    bypassIndirectTbl = addTransfersToTable(fn, indirectJSON['transferFileSchema'], bypassIndirectTbl)
    allNeededYamlTables.append(fn)

# For the marginalized tables we only need to know filenames so the correct files can get created
print 'noting some needed marginalized files'
for fn in (communityConstantsJSON['srcToCategoryMapFilePaths']
           + communityConstantsJSON['bypassCategoryMapFilePaths']):
    allNeededYamlTables.append(fn)

allNeededYamlTables = list(set(allNeededYamlTables))  # get rid of any duplicates
allNeededYamlTables.sort()

print 'Basic needed yaml tables:'
for fn in allNeededYamlTables:
    print '   %s' % fn

## This block generates a new YAML table giving transfer counts for the 'missing' patients ##

The steps are:

* load the new and old versions of the CSV files
* check for any extraneous changes in the transfer table data
* check the new transfer table data against the current YAML version of that data
* put the new data on 'missing' patients in a new YAML file

In [None]:
with open(pyrheautils.pathTranslate('$(MODELDIR)/OC_Direct_Transfer_Matrices_for_RHEA_2.0_-_Adult_Only_-_09-15-2017_FINAL_HOSP-NH.csv')) as f:
    oldKeys, oldRecs = csv_tools.parseCSV(f)
print oldKeys
with open(pyrheautils.pathTranslate('$(MODELDIR)/OC_Direct_Transfer_Matrices_for_RHEA_2.0_-_Adult_Only_-_06-29-2018_FINAL__missing_admit_update_HOSP-NH.csv')) as f:
    newKeys, newRecs = csv_tools.parseCSV(f)
print newKeys

In [None]:
trimmedNewRecs = []
missingRec = {}
lblKey = '"Hospital-NH Transfers 2 Years of Data Averaged (2013-2014)"'
missingRec[lblKey] = 'MISSING_SRC'
for rec in newRecs:
    rowLbl = rec[lblKey]
    if rowLbl.startswith('MISSING'):
        for key, val in rec.items():
            if key != lblKey:
                if key in missingRec:
                    missingRec[key] += val
                else:
                    missingRec[key] = val
    else:
        trimmedNewRecs.append(rec)
trimmedNewRecs.append(missingRec)

In [None]:
lblKey = '"Hospital-NH Transfers 2 Years of Data Averaged (2013-2014)"'
newD = recsToDict(trimmedNewRecs, lblKey)
print newD.keys()

lblKey = '"Transfers 2 Years of Data Averaged (2013-2014)"'
oldD = recsToDict(oldRecs, lblKey)
print oldD.keys()

This next cell checks the new direct transfer csv against the old (pre-missing-data) version, and checks the new version against the corresponding YAML data.

In [None]:
for key, rec in oldD.items():
    assert key in newD, 'Missing key %s' % key
    assert rec == newD[key], 'rec mismatch (%s vs %s)' % (rec, newD[key])
    #print '%s OK' % key
for key, rec in newD.items():
    if key == 'MISSING_SRC':
        continue
    assert key in oldD, 'Missing key %s' % key
    assert rec == oldD[key], 'rec mismatch (%s vs %s)' % (rec, oldD[key])
    if key in directTbl:
        commonDst = []
        for dst, ct in rec.items():
            if dst in directTbl[key]:
                assert ct == directTbl[key][dst], 'Vals for %s %s do not match: %s vs %s' % (key, dst, ct, directTbl[key][dst])
                commonDst.append(dst)
        #print '%s common destinations: %s' % (key, commonDst)
    print '%s OK; ' % key,

In [None]:
missingD = {}
for dst, vStr in newD['MISSING_SRC'].items():
    try:
        v = float(vStr)
        missingD[dst] = v
    except ValueError:
        print 'excluding %s %s' % (dst, vStr)

In [None]:
totMissingPerYear = sum(missingD.values())
print 'total missing: ', totMissingPerYear

## Scan the existing yaml files without remembering them ##

In [None]:
for fn in (indirectJSON['transferFilePaths'] + directJSON['transferFilePaths']
           + categoryDirectJSON['transferFilePaths'] + ['$(MODELDIR)/com_to_fac_missing_patients_indirect.yaml']):
    print '----------------'
    tbl = addTransfersToTable(fn, indirectJSON['transferFileSchema'])
    tbl, inToOutD, outToInD = cleanTransferTbl(tbl, fn, obsoleteNameD)
    cleanTot = sumOfTableValues(tbl)
    inToOutTot = sum(inToOutD.values())
    outToInTot = sum(outToInD.values())
    print '%s: %s valid, %s in-to-out, %s out-to-in' % (fn, cleanTot, inToOutTot, outToInTot)
    print 'inToOutD: %s' % {a: b for a, b in inToOutD.items()}
    print 'outToInD: %s' % {a: b for a, b in outToInD.items()}

## Traffic To and From Excluded Sites ##

If an excluded site appears as a source in direct_transfer_counts, any patients flowing from that site to an included site (out-to-in) appear at the destination site from outside the system and thus should be 'missing source' patients.  That is, functionally they add to com_to_fac_missing_patients_indirect.yaml

If an excluded site appears as a destination direct_transfer_counts, those patients are in-to-out and should appear as additional flow back to the community.  Ideally they would be removed from facDict[loc][totalTransfersOut] so that the go-home fraction calculated for the fac would increase appropriately.

If an excluded site appears as a source in hosp_indirect_transfer_counts or nh_readmit_transfer_counts, the out-to-in patient is still transferred from community to some in-simulation facility but the source location for that patient becomes unknown.  This would essentially be an addition to com_to_fac_marginalized_indirect.yaml .

If an excluded site appears as a destination in hosp_indirect_transfer_counts or nh_readmit_transfer_counts, the patient would appear not to have returned to the medical system from the community.  Thus these patients would subtract from com_to_fac_marginalized_indirect.yaml .

## Rationale for Normalization ##

For the indirect transfer tables, normalization is performed independently for each pair of source and destination tier, across all destinations matching that tier.  So as long as the source or tier is unique to a given input file, that input file can have any normalization.

We are concerned with adding the 'MISSING_SRC' data, which is replicated identically across all sources.  So for NHs the normalization must match other NH sources, and for non-NHs it must match hosp_indirect_transfer_counts.

| src tier  | dest tier  | count  | fname  | notes |
|---|---|---|---|---|
| non-NH  | non-NH  | 56863  | hosp_indirect_transfer_counts  | real counts over a year |
| any  | NH  | 7965.01 | com_to_fac_missing_patients_indirect  | each src and tier is a real count over a year |
| NH  | non-NH  | 76 x 1.0  | nh_readmit_fake_transfer_counts  | do we know the size of this channel? |

The only channel that delivers patients to the NH tier is com_to_fac_missing_patients_indirect, so that channel can have its own normalization.  We do not know the total counts/year in the nh_readmit_fake_transfer_counts channel, but because its combination of src and dest tier are unique it will be independently normalized.

## Load and remember some principle files ##

In [None]:
fn = pyrheautils.pathTranslate('$(MODELDIR)/hosp_indirect_transfer_counts.yaml')
rawHospIndirectTransfers = addTransfersToTable(fn, indirectJSON['transferFileSchema'])
cleanHospIndirectTransfers, hospIndirectInToOutD, hospIndirectOutToInD = cleanTransferTbl(rawHospIndirectTransfers,
                                                                                          fn, obsoleteNameD)
fn = pyrheautils.pathTranslate('$(MODELDIR)/nh_readmit_transfer_counts.yaml')
rawNHReadmitTransfers = addTransfersToTable(fn, indirectJSON['transferFileSchema'])
cleanNHReadmitTransfers, nhReadmitInToOutD, nhReadmitOutToInD = cleanTransferTbl(rawNHReadmitTransfers,
                                                                                 fn, obsoleteNameD)
fn = pyrheautils.pathTranslate('$(MODELDIR)/direct_transfer_counts.yaml')
rawDirectTransfers = addTransfersToTable(fn, indirectJSON['transferFileSchema'])
cleanDirectTransfers, directInToOutD, directOutToInD = cleanTransferTbl(rawDirectTransfers,
                                                                        fn, obsoleteNameD)


## Put the missing data in the same format ##

Note that we are regenerating the missing data from the newD CSV records rather than using any existing yaml file.

In [None]:

cleanMissingTransfers, missingInToOutD, missingOutToInD = cleanTransferTbl({'COMMUNITY': missingD},
                                                                          'noRealFile', obsoleteNameD)
print 'missingInToOut: %s' % {k:v for k,v in missingInToOutD.values()}
print 'missingOutToIn: %s' % {k:v for k,v in missingOutToInD.values()}
print cleanMissingTransfers.keys()

## Tables We Have, And What To Do With Them ##

| src tier  | dest tier  | via home? | name | notes |
|-----------|------------|-----------|------|-------|
| any | any  |  no  | cleanDirectTransfers | |
| non-NH | non-NH | yes | cleanHospIndirectTransfers | |
| | |  | cleanNHReadmitTransfers | empty |
| NH | non-NH | yes | nh_readmit_fake_transfer_counts | made from hosp_indirect etc below |
| None | any | yes | com_to_fac_marginalized_indirect | made from hosp_indirect etc below |
| NH | non-NH | yes | nh_to_cat_fake_indirect | made from cleanHospIndirectTransfers |

* directInToOutD : This would be discharges to community.  If we regenerate direct_transfer_counts from cleanDirectTransfers we could then regenerate the facility files with updated discharge info.  facility_data_manipulator.ipynb can reconstruct those, but it depends on all the .yaml files this notebook is producing, including the marginalized versions.

* directOutToInD : This would be flow from community to facilities.  It would increase the admission counts of the destination facilities, and we would have to increase the community get-sick rate to compensate.

* cleanMissingTransfers : This would be flow from community to facilities.  Can we just merge it with directOutToInD?

* cleanHospIndirectTransfers + cleanNHReadmitTransfers :

* hospIndirectOutToInD : empty. If there were any, they would look like admissions with history None.

* hospIndirectInToOutD : entries look like people who do not return to the health care system

* cleanNHReadmitTransfers : empty

* nhReadmitOutToInD: empty

* nhReadmitInToOutD: empty

After we have these, we have to marginalize destinations to facility categories to provide input for community getStatusChangeTree.
 


## Check main yaml files against internal representation and optionally rewrite them ##

This step will finally create the com_to_fac_missing_patients_indirect table if it does not already exist

In [None]:

checkAndMaybeRewrite('$(MODELDIR)/direct_transfer_counts.yaml', cleanDirectTransfers,
                    createOnlyThese=allNeededYamlTables)
checkAndMaybeRewrite('$(MODELDIR)/hosp_indirect_transfer_counts.yaml', cleanHospIndirectTransfers,
                    createOnlyThese=allNeededYamlTables)
checkAndMaybeRewrite('$(MODELDIR)/nh_readmit_transfer_counts.yaml', cleanNHReadmitTransfers,
                    createOnlyThese=allNeededYamlTables)
checkAndMaybeRewrite('$(MODELDIR)/com_to_fac_missing_patients_indirect.yaml', cleanMissingTransfers,
                    comment=("This file was generated from the MISSING entries of the"
                            "direct transfer table"),
                    createOnlyThese=allNeededYamlTables)

In [None]:
# Irrespective of their history, some fraction of newly sick COM patients need to follow missingD.  
# Respecting their history, some need to follow hosp_indirect_transfer_counts and nh_readmit_transfer_counts.
cleanMissing = {fac: cleanMissingTransfers['COMMUNITY'].copy() for fac in facDict
               if facDict[fac]['category'] != 'COMMUNITY'}
checkAndMaybeRewrite('$(MODELDIR)/missing_indirect_transfer_counts.yaml', cleanMissing,
                    comment=("This file was generated by taking the 'missing' arrival count for"
                            " all facilities as incoming transfers and replicating the resulting"
                            " transfers across all facilities"),
                    createOnlyThese=allNeededYamlTables)


In [None]:
# We need to provide a table of indirect transfer destination categories originating from NHs,
# but that data is not included in the available tables.  A fair solution is probably to use
# the indirect transfers from hospitals averaged over hospitals, and apply it to all NHs.
catCtD = defaultdict(lambda: 0)
allSrcTbl = defaultdict(lambda: 0)
srcCatCtD = defaultdict(lambda: defaultdict(lambda: 0))
for srcName, rec in cleanHospIndirectTransfers.items():
    for dstName, ct in rec.items():
        dstCategory = facDict[dstName]['category']
        catCtD[dstCategory] += float(ct)
        allSrcTbl[dstName] += float(ct)
        srcCatCtD[srcName][dstCategory] += float(ct)
tot = sum(catCtD.values())
print tot
netRec = {cat: float(ct)/float(tot) for cat, ct in catCtD.items()}
print netRec
fakeD = {}
for abbrev in facDict:
    if facDict[abbrev]['category'] == 'NURSINGHOME':
        fakeD[abbrev] = netRec.copy()
checkAndMaybeRewrite('$(MODELDIR)/nh_to_cat_fake_indirect.yaml', fakeD,
                    comment=("This table was generated by finding the overall fraction of"
                            " indirect transfers originating from any HOSP or LTAC to"
                            " facilities of the given category and replicating that ratio"
                            " for all NURSINGHOMES."),
                    createOnlyThese=allNeededYamlTables)


In [None]:
# We need a table of missing transfer information marginalized across destination categories
# to serve as the upstream end for the com_to_fac_missing_patients_indirect table.
tmpD = defaultdict(lambda: defaultdict(lambda: 0))
for srcName, rec in cleanMissingTransfers.items():
    for dstName, ct in rec.items():
        dstCategory = facDict[dstName]['category']
        tmpD[srcName][dstCategory] += float(ct)
        #print '%s %s' % (srcName, dstCategory)
comCatMissingD = {}
for srcName, rec in tmpD.items():
    tot = sum(rec.values())
    comCatMissingD[srcName] = {}
    for dstCat, ct in rec.items():
        comCatMissingD[srcName][dstCat] = float(ct)/float(tot)
checkAndMaybeRewrite('$(MODELDIR)/com_to_cat_missing_patients_indirect.yaml', comCatMissingD,
                    comment=("This table was generated by summing com_to_fac_missing_patients_indirect"
                            " over destination category"),
                    createOnlyThese=allNeededYamlTables)


In [None]:
# We also need a table mapping from NHs to destination HOSPs to substitute for the missing information
# on indirect transfers originating from NHs.  This is essentially the second half of the routing
# provided by nh_to_cat_fake_indirect.yaml created above.
totCt = sum(allSrcTbl.values())
normAllSrcTbl = {dst: float(ct)/float(totCt) for dst, ct in allSrcTbl.iteritems()}
fakeD = {}
for abbrev in facDict:
    if facDict[abbrev]['category'] == 'NURSINGHOME':
        fakeD[abbrev] = {key:val for key,val in normAllSrcTbl.items()}  # get rid of defaultdict
checkAndMaybeRewrite('$(MODELDIR)/nh_readmit_fake_transfer_counts.yaml', fakeD,
                    comment=("This was generated by summing hosp_indirect_transfer_counts.yaml"
                            " and nh_readmit_transfer_counts.yaml over source locations and"
                            " normalizing, then replicating the resulting map for all NHs"),
                    createOnlyThese=allNeededYamlTables)


In [None]:
normSrcCatCtD = {}
for src, catD in srcCatCtD.items():
    tot = sum(catD.values())
    normCatCtD = {cat: ct/tot for cat, ct in catD.items()}
    normSrcCatCtD[src] = normCatCtD
checkAndMaybeRewrite('$(MODELDIR)/fac_to_cat_marginalized_indirect.yaml', normSrcCatCtD,
                    comment=("This was generated by summing hosp_indirect_transfer_counts.yaml"
                            " and nh_readmit_transfer_counts.yaml across destination facility"
                            " by category and then normalizing the resulting counts by source"
                            " facility"),
                    createOnlyThese=allNeededYamlTables)

In [None]:
print srcCatCtD['SJUD']
print normSrcCatCtD['SJUD']


In [None]:
oD = {}
oD['COMMUNITY'] = {key:val for key,val in allSrcTbl.items()}  # get rid of defaultdict
checkAndMaybeRewrite('$(MODELDIR)/com_to_fac_indirect.yaml', oD,
                    comment=("This was generated by summing hosp_indirect_transfer_counts.yaml"
                            " and nh_readmit_transfer_counts.yaml over source locations"),
                    createOnlyThese=allNeededYamlTables)
oD = {}
oD['COMMUNITY'] = {key:val for key,val in normAllSrcTbl.items()}  # get rid of defaultdict
checkAndMaybeRewrite('$(MODELDIR)/com_to_fac_marginalized_indirect.yaml', oD,
                    comment=("This was generated by summing hosp_indirect_transfer_counts.yaml"
                            " and nh_readmit_transfer_counts.yaml over source locations and"
                            " normalizing"),
                    createOnlyThese=allNeededYamlTables)
comToFacMarginalizedIndirectD = oD  # turns out we need it later
#kL = allSrcTbl.keys()[:]
#kL.sort()
#for k in kL:
    #print '%s: %s' % (k, allSrcTbl[k])

In [None]:
goHomeRec = {}
for fac in [f for f in facDict if facDict[f]['category'] != 'COMMUNITY']:
    nDischarged = facDict[fac]['totalDischarges']['value']
    nTransOut = sum(srcCatCtD[fac].values())
    goHomeRec[fac] = nDischarged - nTransOut
goHomeD= {'COMMUNITY': goHomeRec}
cleanGoHomeD, goHomeInToOutD, goHomeOutToInD = cleanTransferTbl(goHomeD, 'NoRealFile', obsoleteNameD)
print 'goHomeInToOutD: %s' % {k: v for k, v in goHomeInToOutD.items()}
print 'goHomeOutToInD: %s' % {k: v for k, v in goHomeOutToInD.items()}
checkAndMaybeRewrite('$(MODELDIR)/com_to_fac_from_discharge_rate.yaml', cleanGoHomeD,
                    comment=("The flow given here for each facility is the total discharge"
                            " rate for the facility minus total direct transfers out of the"
                            " facility"),
                    createOnlyThese=allNeededYamlTables)

In [None]:
print 'sum of cleanGoHomeD: ', sumOfTableValues(cleanGoHomeD)
print 'sum of cleanHospIndirectTransfers: ', sumOfTableValues(cleanHospIndirectTransfers)
print 'sum of cleanNHReadmitTransfers: ', sumOfTableValues(cleanNHReadmitTransfers)
print 'sum of cleanMissingTransfers: ', sumOfTableValues(cleanMissingTransfers)
totComPop = 0
for fac, rec in facDict.items():
    if rec['category'] == 'COMMUNITY':
        totComPop += rec['meanPop']['value']
print 'total community population: ', totComPop
oldRate = 365*1.925e-4*totComPop
print 'old rate: ', oldRate
print sumOfTableValues(cleanGoHomeD) - oldRate
d = defaultdict(lambda: 0)
for src, ct in cleanGoHomeD['COMMUNITY'].items():
    if src in facDict:
        d[facDict[src]['category']] += ct
d = {k:v for k,v in d.iteritems()}
print 'cleanGoHomeD categories: ', d


In [None]:
deltaD = {}
aveD = {}
itD = {}
dtD = {}
print sum(cleanHospIndirectTransfers['SJUD'].values())
for fac, rec in facDict.items():
    if facDict[fac]['category'] != 'COMMUNITY':
        nDischarged = facDict[fac]['totalDischarges']['value']
        nDirectTrans = sum([r['count']['value'] for r in facDict[fac]['totalTransfersOut']])
        nIndirectTrans = (sum(cleanHospIndirectTransfers[fac].values())
                          if fac in cleanHospIndirectTransfers else 0.0)
        nAdmitted = facDict[fac]['totalAdmissions']['value']
        deltaD[fac] = nDischarged - nAdmitted
        aveD[fac] = 0.5*(nDischarged + nAdmitted)
        itD[fac] = nIndirectTrans
        dtD[fac] = nDirectTrans

def printTbl(facL = None):
    if facL is None:
        facL = [fac for fac in facDict if facDict[fac]['category'] != 'COMMUNITY']
        facL.sort()
    for fac in facL:
        dv = deltaD[fac] if fac in deltaD else 0.0
        av = aveD[fac] if fac in aveD else 0.0
        itv = itD[fac] if fac in itD else 0.0 
        dtv = dtD[fac] if fac in dtD else 0.0
        mv = cleanMissingTransfers['COMMUNITY'][fac] if fac in cleanMissingTransfers['COMMUNITY'] else 0.0
        print '%s (%s) %s %s %s %s %s' % (fac, facDict[fac]['category'], dv, av, itv, dtv, mv)
#printTbl()
printTbl(['SJUD', 'PACI'])
print cleanMissingTransfers['COMMUNITY']['PACI']
print facDict['PACI']['totalTransfersIn']['value']
print facDict['PACI']['totalDischarges']['value']
print facDict['PACI']['totalDischarges']['value'] - facDict['PACI']['totalTransfersIn']['value']
tot = 0.0
for fac in facDict:
    if facDict[fac]['category'] == 'HOSPITAL':
        v1 = cleanMissingTransfers['COMMUNITY'][fac] if fac in cleanMissingTransfers['COMMUNITY'] else 0.0
        #v2 = facDict[fac]['totalDischarges']['value'] - (facDict[fac]['totalTransfersIn']['value'] + allSrcTbl[fac])
        v2 = facDict[fac]['totalDischarges']['value']
        v3 = sum(r['count']['value'] for r in facDict[fac]['totalTransfersOut'])
        v4 = sum(cleanDirectTransfers[fac].values())
        v5 = facDict[fac]['totalTransfersIn']['value']
        v6 = 0.0
        for src in cleanDirectTransfers:
            if fac in cleanDirectTransfers[src]:
                v6 += cleanDirectTransfers[src][fac]
        #print fac, v1, v2, v3, v4, sum(srcCatCtD[fac].values())
        print fac, v5, v6


cleanGoHomeD is (all discharges - HOSP_indirect), so it represents counts that are not expected to return.

Balanced numbers: everyone gets (totalDischarges - totalTransfersIn) regardless of source

Obeying indirect transfer matrix: everyone gets sum(cleanIndirect[?][dst=fac]) + cleanGoHomeD[fac]

actualMissing = (balanced numbers) - (obeying indirect transfer matrix)

I can use 
```
sum(cleanDirectTransfers[fac].values())
```
in lieu of 
```
sum(r['count']['value'] for r in facDict[fac]['totalTransfersOut'])
```
to get the total number of direct transfers out

I can use
```
        for src in cleanDirectTransfers:
            if fac in cleanDirectTransfers[src]:
                v6 += cleanDirectTransfers[src][fac]
```
in lieu of
```
    facDict[fac]['totalTransfersIn']['value']
```

In [None]:
balancedFromCommunityD = {}
for fac in facDict:
    if facDict[fac]['category'] == 'COMMUNITY':
        continue
    nOut = facDict[fac]['totalDischarges']['value']
    nInByDirectTransfer = 0.0
    for src in cleanDirectTransfers:
        if fac in cleanDirectTransfers[src]:
            nInByDirectTransfer += cleanDirectTransfers[src][fac]
    nFromCommunity = nOut - nInByDirectTransfer
    #print fac, nInByDirectTransfer, nOut, nFromCommunity
    balancedFromCommunityD[fac] = nFromCommunity
checkAndMaybeRewrite('$(MODELDIR)/com_to_fac_to_balance_discharges.yaml',
                     {'COMMUNITY': balancedFromCommunityD},
                    comment=("The flow given here is the discharge rate minus the"
                            "total direct transfers arriving at the facility"),
                    createOnlyThese=allNeededYamlTables)

In [None]:
balancedFromCommunityToCatD = defaultdict(lambda: 0.0)
for fac, ct in balancedFromCommunityD.items():
    balancedFromCommunityToCatD[facDict[fac]['category']] += ct
balancedFromCommunityToCatD = {k: v for k, v in balancedFromCommunityToCatD.iteritems()}
#print balancedFromCommunityToCatD
checkAndMaybeRewrite('$(MODELDIR)/com_to_cat_to_balance_discharges.yaml',
                    {'COMMUNITY': balancedFromCommunityToCatD},
                    comment=('The flow given here is com_to_fac_to_balance_discharges.yaml'
                            ' marginalized over destination categories'),
                    createOnlyThese=allNeededYamlTables)

In [None]:
indirectFromCommunityD = {}
for fac in facDict:
    if facDict[fac]['category'] == 'COMMUNITY':
        continue
    nFromIndirect = 0.0
    for src in cleanHospIndirectTransfers:
        if fac in cleanHospIndirectTransfers[src]:
            nFromIndirect += cleanHospIndirectTransfers[src][fac]
    indirectFromCommunityD[fac] = nFromIndirect
    #if facDict[fac]['category'] == 'NURSINGHOME':
    #    print fac, nFromIndirect, balancedFromCommunityD[fac], (cleanMissingTransfers['COMMUNITY'][fac] 
    #                                                     if fac in cleanMissingTransfers['COMMUNITY'] else 0.0)
realDeficitD = {}
for fac in balancedFromCommunityD:
    nFromCommunity = balancedFromCommunityD[fac]
    nFromIndirect = indirectFromCommunityD[fac] if fac in indirectFromCommunityD else 0.0
    realDeficitD[fac] = nFromCommunity - nFromIndirect
    #if facDict[fac]['category'] != 'NURSINGHOME':
    #    print (fac, facDict[fac]['totalAdmissions']['value'], facDict[fac]['totalDischarges']['value'],
    #          realDeficitD[fac], 
    #           (cleanMissingTransfers['COMMUNITY'][fac] if fac in cleanMissingTransfers['COMMUNITY'] else 0.0))
fac = 'HOAG'
nInByDirectTransfer = 0.0
for src in cleanDirectTransfers:
    if fac in cleanDirectTransfers[src]:
        nInByDirectTransfer += cleanDirectTransfers[src][fac]
nOut = facDict[fac]['totalDischarges']['value']
nFromIndirect = indirectFromCommunityD[fac] if fac in indirectFromCommunityD else 0.0
realDeficit = realDeficitD[fac]
print fac, nOut, nInByDirectTransfer, balancedFromCommunityD[fac], nFromIndirect, realDeficit
# realDeficit here is the number of thaws which go to fac *beyond* those necessary to cover indirect transfers
# Total thaws going to this fac should be nFromIndirect + realDeficit == balancedFromCommunity[fac]
# Total thaws should equal sum(balancedFromCommunity.values())
# Total thaws excluding 'corrected missing' should be sum(admissions - directTransfersIn)
#                                                   = sum(admissions) - sum(cleanDirectTransfers)
totPerYear = sum(balancedFromCommunityD.values())
totRate = totPerYear/(365.0 * totComPop)
print totPerYear, totRate
totAdmissions = 0.0
for fac in facDict:
    if facDict[fac]['category'] != 'COMMUNITY':
        totAdmissions += facDict[fac]['totalAdmissions']['value']
nonBypassPerYear = totAdmissions - sumOfTableValues(cleanDirectTransfers)
print nonBypassPerYear
bypassFrac = (totPerYear - nonBypassPerYear)/totPerYear
print 'total thaws per year: ', totPerYear
print 'base thaw rate is: ', totRate
print 'bypass fraction is: ', bypassFrac

# total thaws going to fac should be balancedFromCommunityD[fac]
# total thaws which actually go there are totPerYear * fraction to the given fac
# the best estimate of fraction to the given fac is probably from com_to_fac_marginalized_indirect
trueMissingD = {}
for fac in facDict:
    if facDict[fac]['category'] != 'COMMUNITY':
        nPerYear = (totPerYear * comToFacMarginalizedIndirectD['COMMUNITY'][fac]
                    if fac in comToFacMarginalizedIndirectD['COMMUNITY'] else 0.0)
        trueMissingD[fac] = (balancedFromCommunityD[fac] - nPerYear)
if tableNonNeg({'COMMUNITY': trueMissingD}):
    checkAndMaybeRewrite('$(MODELDIR)/com_to_fac_true_missing.yaml',
                        {'COMMUNITY': trueMissingD},
                        comment=("This table gives an estimate of the number of discharges not"
                                " accounted for by direct transfers, indirect transfers, and"
                                " admissions from the community which are neither direct nor indirect"),
                        createOnlyThese=allNeededYamlTables)
else:
    print 'This idea fails because discharges may be less than admissions due to sampling error'
print comToFacMarginalizedIndirectD['COMMUNITY']['HOAG']
print (totPerYear * comToFacMarginalizedIndirectD['COMMUNITY']['HOAG'])
print balancedFromCommunityD['HOAG']

In [None]:
toCatD = defaultdict(lambda: 0)
for fac, ct in cleanGoHomeD['COMMUNITY'].items():
    toCatD[facDict[fac]['category']] += ct
comToCatD = {'COMMUNITY': {k: v for k, v in toCatD.items()}}
checkAndMaybeRewrite('$(MODELDIR)/com_to_cat_from_discharge_rate.yaml', comToCatD,
                    comment=("These values were generated by summing com_to_fac_from_discharge_rate.yaml"
                            " by destination category"),
                    createOnlyThese=allNeededYamlTables)


In [None]:
print cleanHospIndirectTransfers.keys()
print cleanNHReadmitTransfers.keys()
print cleanMissing.keys()
print [facDict[fac]['category'] for fac in cleanMissing.keys()]