
## 4. <a id='4_cell'></a>Create MeasureReport(MR) using CCV, Pandas and the PyFHIR models....

This uses Pandas to easily upload patient data from CSV file and create  a MeasureReport (MR). The MR is then converted back the CSV to demonstrat the roundtrip.

- I'm using:
  - the [CDC/NHSN csv](https://www.cdc.gov/nhsn/pdfs/covid19/import-covid19-data-508.pdf) format with sample file `pyFHIR_models/c19_data/source/c19_data.csv` and `pyFHIR_models/c19_data/source/c19_data_min.csv` containing data for Hospital X period of several days
  - the fema format csv based on the template referenced [here](https://www.cms.gov/files/document/32920-hospital-letter-vice-president-pence.pdf), the csv format is described in `pyFHIR_models/c19_data/source/Template_for_Daily_Hospital_COVID19_Reporting (1).xlsx` with sample file `pyFHIR_models/c19_data/source/fema_data.csv` 
- convert csv to Panda dataframe
- create MR from each row in dataframe
    -  based upon this [Measure definition](file:///Users/ehaas/Documents/FHIR/IG-template4/docs/Measure-cdc-covid.html)
    -  using static metadata for now
    -  
- Validate MR
- Bundle the MRs Save and Post to a reference server
- Convert bundle back to the CDC/NHSN csv format
- Save CSV file

In [117]:
cd ~/pyFHIR_models # This is to make sure in right directory for Binder

[Errno 2] No such file or directory: '/Users/ehaas/pyFHIR_models # This is to make sure in right directory for Binder'
/Users/ehaas/Documents/Python/MyBinder/pyFHIR_models


### import all the modules you need

In [118]:
from fhir_model_generator.model import measurereport, bundle, location, fhirdate, fhirreference, identifier, contactpoint, address
from json import dumps
from pandas import *
from pathlib import Path
from requests import get, post
from datetime import datetime, date, timedelta
import uuid
from IPython.display import display as Display, HTML, Markdown

### Validate,Save,Bundle,POST Definitions (functions)
-used by main process

In [119]:
in_path = 'c19_data/source'
out_path = 'c19_data/MR_bundle'
#in_file = 'c19_data.csv'  # max file with all fields filled out
#in_file = 'c19_data_min.csv'  # min file with only requiired fields filled out


cdc = dict(
    name = 'cdc-c19-cohort',
    in_file = 'c19_data.csv',  
    measure = "http://cdcmeasures.example.org/cdc-covid",
    scoring = "cohort",
    groups =[
    "numVent",
    "numVentUse",
    "numTotBeds",
    "numbeds",
    "numBedsOcc",
    "numICUBeds",
    "numICUBedsOcc",
    "numC19HospPats",
    "numC19MechVentPats",
    "numC19HOPats",
    "numC19OverflowPats",
    "numC19OFMechVentPats",
    "numC19Died",
      ],
    )

fema_proportion = dict(
    name = 'fema-c19-proportion',
    in_file = 'fema_data.csv',   
    measure = "http://build.fhir.org/ig/AudaciousInquiry/saner-ig/fema-proportion",
    scoring = "proportion",
    groups =[
    "newPercentPositive",
    "cumulativePercentPositive",
      ],
    pops =[
    ("newTestsResulted","newPositiveC19Tests"),
    ("cumulativeTestsPerformed","cumulativePositiveC19Tests"),
      ],
    )

fema_cohort = dict(
    name = 'fema-c1-cohort',
    in_file = 'fema_data.csv',   
    measure = "http://build.fhir.org/ig/AudaciousInquiry/saner-ig/fema-cohort",
    scoring = "cohort",
    groups =[
    "newDiagnosticTests",
    "cumulativeDiagnosticTests",
    "cumulativeSpecimensRejected",
      ],
    )

mr_data_mapping = [cdc,fema_cohort,fema_proportion]

#***********************
mr_data = mr_data_mapping[1]
#************************


#ref_server ='http://hapi.fhir.org/baseR4'
ref_server ='http://test.fhir.org/r4'
headers = {
    'Accept':'application/fhir+json',
    'Content-Type':'application/fhir+json'
    }
params = dict(
          )

def isNaN(num):
    return num == num

def validate_me(pyfhir_res):
    print(f'validating to {ref_server}...')
    r = post(f'{ref_server}/{pyfhir_res.resource_type}/$validate',
             params = params, headers = headers, data = dumps(pyfhir_res.as_json())
            )
    display(HTML(
        '<h1>Validation output</h1>'
        f'<h3>Status Code = {r.status_code}</h3>'
        f'{r.json()["text"]["div"]}'
        ))
    
   
def bundle_me(pyfhir_res, fhir_bundle=None):
    file_ts = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
    new_urn = uuid.uuid1().urn # new urn for resource
    pyfhir_res.id = new_urn[9:]
    e = bundle.BundleEntry()
    e.fullUrl = new_urn
    e.resource = pyfhir_res
    e.request = bundle.BundleEntryRequest()
    e.request.method = 'POST'
    e.request.url = pyfhir_res.resource_type
    if fhir_bundle: #add entry
        pass
    else:  # create transaction bundle
        bundle_type = 'transaction'
        bundle_id = f'{mr_data["name"]}-{file_ts}'   
        fhir_bundle = bundle.Bundle(
            dict(
                id = bundle_id,
                type = bundle_type,
                timestamp = pyfhir_res.date.as_json(),
                entry = [],
            )
        )
    fhir_bundle.entry.append(e)
    return(fhir_bundle)
    
def save_me(pyfhir_res):
    print('...........saving to file............')
    #save in out_path
    path = Path.cwd() / out_path / f'{pyfhir_res.resource_type}-{pyfhir_res.id.lower()}.json'
    path.write_text(dumps(pyfhir_res.as_json(), indent=4))
 

def post_me(pyfhir_res):
    print(f'posting to {ref_server} as transaction bundle ...')
    r = post(f'{ref_server}',
          params = params, headers = headers, data = dumps(pyfhir_res.as_json())
            )
    try:
        display(HTML(
            '<h1>Post Response</h1>'
            f'<h3>Status Code = {r.status_code}</h3><br />'
            f'<pre>Response Headers: {dumps(dict(r.headers), indent=4)}</pre>'
            f'<em>Resource Narrative</em>: {r.json()["text"]["div"]}'
            '===============================================<br /><br /><br />'
            ))
    except KeyError:
        display(HTML(
            '<h1>Post Response</h1>'
            f'<h3>Status Code = {r.status_code}</h3><br />'
            f'<pre>Response Headers: {dumps(dict(r.headers), indent=4)}</pre>'
            #f'<em>Resource Narrative</em>: {r.json()["text"]["div"]}'
            '===============================================<br /><br /><br />'
            ))   



### Get local file and Read into Pandas DataFrame

- convert date string to date object

In [120]:
path = Path.cwd() / in_path / mr_data["in_file"]
df = read_csv(path)
df['collectiondate'] = to_datetime(df.collectiondate) # convert date to date-time
df['collectiondate'].dt.date
df

Unnamed: 0,collectiondate,State,County,newDiagnosticTests,cumulativeDiagnosticTests,newTestsResulted,cumulativeSpecimensRejected,cumulativeTestsPerformed,newPositiveC19Tests,cumulativePositiveC19Tests,newPercentPositive,cumulativePercentPositive
0,2020-04-04,CA,Marin,100,1000,80,45,800,8,80,0.1,0.1
1,2020-04-05,CA,Marin,100,1100,90,55,890,9,89,0.1,0.1
2,2020-04-06,CA,Marin,100,1200,100,65,990,9,98,0.09,0.09899
3,2020-04-07,CA,Marin,100,1300,100,75,1090,11,109,0.11,0.1
4,2020-04-08,CA,Marin,100,1400,90,85,1180,10,119,0.111111,0.100847
5,2020-04-09,CA,Marin,100,1500,80,95,1260,10,129,0.125,0.102381
6,2020-04-10,CA,Marin,100,1600,70,105,1330,8,137,0.114286,0.103008


### Create the MR Resource

 - start with fixed meta data for MR as dict


#### Variables

In [121]:

measureScore = None
my_bundle = None

f_now = fhirdate.FHIRDate(f'{datetime.utcnow().isoformat()}Z')
f_now = f_now.as_json()

NSHN_system = 'urn:oid:2.16.840.1.113883.6.277' # OID for NHSN
NSHN_OID = '2.16.840.1.113883.6.277.123'  # Fake OID
pop_code_system = "http://terminology.hl7.org/CodeSystem/measure-population"

jdn = dict(
    system ='http://terminology.hl7.org/CodeSystem/location-physical-type',
    code = 'jdn',
    display = 'Jurisdiction' ,
    )
ptype = dict(
    coding = [jdn],
    text = "US County",
            )


mr_reporter_identifier = dict(
    system = NSHN_system,
    value = NSHN_OID
    )

mr_reporter = dict(
  identifier = mr_loc_identifier,
  display = "Acme General Hospital",  
    )


mr_meta = dict(
            status = 'complete',
            type = 'individual',
            measure = mr_data["measure"],
            subject = mr_reporter, # initially same as reporter
            date = f_now,
            reporter = mr_reporter,  # finitially now same as subject
        )

#### Functions for defining subelements

In [121]:
def get_loc(state, county):
    address = dict(
    type = "physical",
    text = f'{county}, {state}',
    district = county,
    state = state,
    )
    loc = location.Location(
       dict(
       id = "loc",
       address = address,
       physicalType = ptype,      
        )
      )
    return(loc)

#a = get_loc("CA", "Marin")
#print(a.as_json())

def get_pop(i, pop, pop_code_value, id=False):
    pop_coding = dict(
        system = pop_code_system,
        code = pop_code_value,
            ) 
    pop_code = dict(
    coding = [pop_coding]
    )
    mr_population =  dict(
    id = f'{pop}.{pop_code_value}' if id else None,
    code = pop_code,
    count = getattr(i, pop, None) if isNaN(getattr(i, pop, None)) else None
    )
    return(mr_population)

def get_score(i, num, denom):
    return(dict(
        value = getattr(i, num, None)/getattr(i, denom, None),
        unit ="%",
        system = "http://unitsofmeasure.org",
        code = "%",
            )
          )
 

#### Main

- iterate over the dataframe row
- add in period
- groups
- populations and counts
- and measurescores
- update with contained location for county and state
- validate
- bundle
- validate
- post
- save

In [121]:
for i in df.itertuples(index=True):
    print(f'***************** row = {i.Index} *******************')
    mr_period = dict(
        start = str(i.collectiondate.date()),
        end = str(i.collectiondate.date())
    )
    mr_meta.update(dict(period = mr_period))
    mr_groups = []
    for group_index, group in enumerate(mr_data["groups"]):
        group_population = []
        if mr_data['scoring'] == "cohort":
            pop = group
            group_population.append(get_pop(i, pop, pop_code_value = "initial-population"))
        if mr_data['scoring'] == "proportion":
            group_pops = mr_data["pops"][group_index]
            ipop = group_pops[0]
            group_population.append(get_pop(i, ipop, pop_code_value = "initial-population", id=True))
            group_population.append(get_pop(i, ipop, pop_code_value = "denominator", id=True))
            num = group_pops[1]
            group_population.append(get_pop(i, num, pop_code_value = "numerator", id=True))
            measureScore = get_score(i, num, ipop)
        mr_group = dict(
            code = {'text': group},
            population = group_population,
            measureScore = measureScore if measureScore else None,
            )
        mr_groups.append(mr_group)
    mr_meta.update(dict(group = mr_groups))
    my_mr = measurereport.MeasureReport(mr_meta, strict=False)  # initialize MR instance
    if "fema" in mr_data['name']:  # add contained resource if county and update resource
        loc = get_loc(state = i.State, county = i.County)
        my_mr.contained = [loc]
        my_mr.subject = fhirreference.FHIRReference(dict(
                reference = "#loc",
                display = f"{i.County}, {i.State}",
            )
        )
    #print(dumps(my_mr.as_json(),indent=4))
    validate_me(my_mr)
    my_bundle = bundle_me(my_mr,my_bundle) # as transaction
print(dumps(my_bundle.as_json(),indent=4))
validate_me(my_bundle)
save_me(my_bundle)
post_me(my_bundle)

### Convert back to CSV

- create new Dataframe based on the old one ( this is cheating )
- For each bundle add row to Dataframe
- save as CSV

In [124]:
new_df = df.iloc[0:0,:].copy()
for i, entry in enumerate(my_bundle.entry):
    new_row = {}
    mr = entry.resource
    new_row['collectiondate']= mr.period.start.as_json()
    # add county, state if fema
    for group in mr.group:
        try:
            new_row[group.code.text]= group.measureScore.value #Measure Score for proportions...
        except AttributeError:
            pass
        for gpop in group.population:
            if gpop.id:
                new_row[gpop.id]= gpop.count
            new_row[group.code.text]= gpop.count            
    new_df = new_df.append(new_row, ignore_index=True)

new_df['collectiondate'] = to_datetime(new_df.collectiondate) # convert date to date-time
new_df['collectiondate'] = new_df['collectiondate'].dt.strftime('%m/%d/%Y')
new_df.fillna(value=np.nan, inplace=True) # # convert None to NaN
new_df

Unnamed: 0,collectiondate,State,County,newDiagnosticTests,cumulativeDiagnosticTests,newTestsResulted,cumulativeSpecimensRejected,cumulativeTestsPerformed,newPositiveC19Tests,cumulativePositiveC19Tests,newPercentPositive,cumulativePercentPositive
0,04/04/2020,,,100,1000,,45,,,,,
1,04/05/2020,,,100,1100,,55,,,,,
2,04/06/2020,,,100,1200,,65,,,,,
3,04/07/2020,,,100,1300,,75,,,,,
4,04/08/2020,,,100,1400,,85,,,,,
5,04/09/2020,,,100,1500,,95,,,,,
6,04/10/2020,,,100,1600,,105,,,,,


In [125]:
path = Path.cwd() / out_path / f'{my_bundle.resource_type}-{my_bundle.id.lower()}.csv'
print(f'....saving {my_bundle.resource_type}-{my_bundle.id.lower()}.csv to file ....')
new_df.to_csv(path, index=False)

....saving Bundle-fema-c1-cohort-20200406205852178124.csv to file ....
