## Flatten FHIR instances to CSV

- see this chat: https://chat.fhir.org/#narrow/stream/179166-implementers/topic/What.20is.20the.20state.20of.20FHIR-.3ECSV.20export.3F
- and this one too: https://chat.fhir.org/#narrow/stream/179218-python/topic/FHIR.20to.20Dataset

idea is to simply use a flatten library to flatten the resource and convert to csv using pandas

### Import the Necessary Modules

In [76]:
#from pandas import *
from pandas.io.json import json_normalize
from json import loads
from flatten_json import flatten_json as flt

### Flatten a Couple of Sample Instances and Transform into a Dataframe

In [77]:
my_patient1 ='''
{
    "resourceType": "Patient",
    "identifier": [
        {
            "system": "http://example.org",
            "value": "123"
        }
    ],
    "active": true,
    "name": [
        {
            "text": "Amy Brown",
            "family": "Brown",
            "given": [
                "Amy"
            ]
        }
    ],
    "gender": "female",
    "deceasedBoolean": false,
    "contact": [
        {
            "telecom": [
                {
                    "system": "phone",
                    "value": "5555555555"
                }
            ],
            "address": {
                "use": "home",
                "text": "100 Main St, Napa, CA 94559",
                "line": [
                    "100 Main St"
                ],
                "city": "Napa",
                "state": "CA",
                "postalCode": "94559"
            }
        }
    ]
}
'''

my_patient2='''
{
    "resourceType": "Patient",
    "identifier": [
        {
            "system": "http://example.org",
            "value": "234"
        },
        {
            "system": "http://example.org",
            "value": "foo"
        }
    ],
    "active": true,
    "name": [
        {
            "text": "Bert Black",
            "family": "Black",
            "given": [
                "Bert",
                "Ernie"
            ]
        }
    ],
    "gender": "male",
    "deceasedBoolean": false,
    "contact": [
        {
            "telecom": [
                {
                    "system": "phone",
                    "value": "5555555555"
                },
                {
                    "system": "email",
                    "value": "myemail@email.com"
                }
            ],
            "address": {
                "use": "home",
                "text": "100 Oak St, Apt 1A, Napa, CA 94558",
                "line": [
                    "100 Oak St",
                    "Apt 1A"
                ],
                "city": "Napa",
                "state": "CA",
                "postalCode": "94558"
            }
        }
    ]
}
'''

flat = flt(loads(my_patient1))
df1 = json_normalize(flat)

flat = flt(loads(my_patient2))
df2 = json_normalize(flat)

df3 = df1.append(df2, ignore_index=True)

df3

Unnamed: 0,active,contact_0_address_city,contact_0_address_line_0,contact_0_address_line_1,contact_0_address_postalCode,contact_0_address_state,contact_0_address_text,contact_0_address_use,contact_0_telecom_0_system,contact_0_telecom_0_value,...,gender,identifier_0_system,identifier_0_value,identifier_1_system,identifier_1_value,name_0_family,name_0_given_0,name_0_given_1,name_0_text,resourceType
0,True,Napa,100 Main St,,94559,CA,"100 Main St, Napa, CA 94559",home,phone,5555555555,...,female,http://example.org,123,,,Brown,Amy,,Amy Brown,Patient
1,True,Napa,100 Oak St,Apt 1A,94558,CA,"100 Oak St, Apt 1A, Napa, CA 94558",home,phone,5555555555,...,male,http://example.org,234,http://example.org,foo,Black,Bert,Ernie,Bert Black,Patient


### Convert the DataFrame to csv

In [78]:
my_csv = df3.to_csv(index=False)
my_csv

'active,contact_0_address_city,contact_0_address_line_0,contact_0_address_line_1,contact_0_address_postalCode,contact_0_address_state,contact_0_address_text,contact_0_address_use,contact_0_telecom_0_system,contact_0_telecom_0_value,contact_0_telecom_1_system,contact_0_telecom_1_value,deceasedBoolean,gender,identifier_0_system,identifier_0_value,identifier_1_system,identifier_1_value,name_0_family,name_0_given_0,name_0_given_1,name_0_text,resourceType\nTrue,Napa,100 Main St,,94559,CA,"100 Main St, Napa, CA 94559",home,phone,5555555555,,,False,female,http://example.org,123,,,Brown,Amy,,Amy Brown,Patient\nTrue,Napa,100 Oak St,Apt 1A,94558,CA,"100 Oak St, Apt 1A, Napa, CA 94558",home,phone,5555555555,email,myemail@email.com,False,male,http://example.org,234,http://example.org,foo,Black,Bert,Ernie,Bert Black,Patient\n'

### Save (or load to datastore...)

In [79]:
out_path ='r4'
with open(f'{out_path}/{df3["resourceType"][0]}-{df3["identifier_0_value"].iloc[0]}-{df3["identifier_0_value"].iloc[-1]}.csv', 'w') as f:
    print(f'writing file {df3["resourceType"][0]}-{df3["identifier_0_value"].iloc[0]}-{df3["identifier_0_value"].iloc[-1]}.csv to {out_path}' )
    f.write(my_csv)
    

writing file Patient-123-234.csv to r4
