In [1]:
# Importing essential Python libraries
import pandas as pd
import numpy as np
import json
import glob
from tqdm import tqdm
import time
import os
import gc

In [3]:
# Specify the operating system in use
OPERATING_SYS = 'Win'
# OPERATING_SYS = 'Linux'

# Set default file path delimiter
delim = '\\'

# Change delimiter if operating system is not Windows
if OPERATING_SYS != 'Win':
    delim = '/'

In [4]:
# Set the root folder path containing the FHIR-formatted Synthea JSON output files
input_root_folder_path = r'C:\Users\marym\Downloads\Marquette_synthea\synthea\output\fhir'

In [5]:
output_folder_path = 'Desktop/output'


if not os.path.exists(output_folder_path):
    os.makedirs(output_folder_path)
    print("The new Output directory is created!")

In [6]:
files = glob.glob(input_root_folder_path+delim+'**'+delim+'*.json',recursive=True)

print('---Found '+str(len(files))+' Json Files---')

---Found 1697 Json Files---


In [7]:
def filter_resource(data, resource_type):
    return list(filter(lambda x: x['resource']['resourceType'] == resource_type.strip(), data['entry']))

## Patient

In [8]:
cols = ['id','gender','birthDate','maritalStatus','city','state','postalCode','country','deceased','deceasedDateTime']
arr = []
start = time.time()
f_count = 0

# tqdm is used here to show a progress bar while loading files
for file in tqdm(files):
    try:
        #load File
        f = open(file)
        data = json.load(f)
        f.close()
        
        ar = []

        patient = filter_resource(data, 'Patient')[0]
        
        ar.append(patient['resource']['id'])
        ar.append(patient['resource']['gender'])
        ar.append(patient['resource']['birthDate'])
        ar.append(patient['resource']['maritalStatus']['text'])
        ar.append(patient['resource']['address'][0]['city'])
        ar.append(patient['resource']['address'][0]['state'])
        ar.append(patient['resource']['address'][0]['postalCode'])
        ar.append(patient['resource']['address'][0]['country'])
        
        if 'deceasedDateTime' in patient['resource']:
            ar.append(True)
            ar.append(patient['resource']['deceasedDateTime'])
        else:
            ar.append(False)
            ar.append(np.nan)

        arr.append(ar)
    
    except Exception as e:
        f_count += 1
        continue

end = time.time()

print(str(f_count)+' Files Failed...')
print(str(len(arr))+' Patient bundles extracted as DataFrame in '+str(end-start)+ 'Seconds')

df_patient = pd.DataFrame(arr, columns = cols)

  0%|          | 0/1697 [00:00<?, ?it/s]

100%|██████████| 1697/1697 [01:33<00:00, 18.24it/s]

2 Files Failed...
1695 Patient bundles extracted as DataFrame in 93.08039784431458Seconds





In [9]:
df_patient['city'].value_counts()

city
Marquette    1695
Name: count, dtype: int64

In [10]:
df_patient['deceased'].value_counts()/len(df_patient)

deceased
False    0.825959
True     0.174041
Name: count, dtype: float64

In [11]:
#Dropping Duplicates If Any
df_patient = df_patient.drop_duplicates('id', 
                                        inplace=False, 
                                        ignore_index=True)

In [12]:
df_patient.head()

Unnamed: 0,id,gender,birthDate,maritalStatus,city,state,postalCode,country,deceased,deceasedDateTime
0,3473d82d-617e-6315-6f8d-ec8d03b53f1f,male,1942-06-16,Never Married,Marquette,MI,49855,US,True,1966-01-27T04:51:01-05:00
1,0ff857ca-1031-c3b4-fb54-fb4b86b8a2d2,female,1997-12-22,Never Married,Marquette,MI,49855,US,False,
2,46304e49-0ad3-0c37-b763-01f0c8d607d1,male,2006-12-05,Never Married,Marquette,MI,49855,US,False,
3,0191f978-e2d3-acf6-f570-ceaccf0d4675,female,1968-03-21,Married,Marquette,MI,49855,US,False,
4,aa01a46d-7a84-89c9-4aca-4b83e1fa0c05,male,1974-11-23,Married,Marquette,MI,49855,US,False,


In [13]:
df_patient.to_csv(output_folder_path+delim+'Patient.csv')
del df_patient
gc.collect()

0

## Conditions

In [14]:
cols = ['code','codeText','patientId','encounterId','onsetDateTime','recordedDate','clinicalStatusCode']

arr = []
start = time.time()
f_count = 0

for file in tqdm(files):
    try:
        #load File
        f = open(file)
        data = json.load(f)
        f.close()
        

        conditions = filter_resource(data, 'Condition')
        for cond in conditions:
            ar = []
            
            ar.append(cond['resource']['code']['coding'][0]['code'])
            ar.append(cond['resource']['code']['text'])
            ar.append(cond['resource']['subject']['reference'].strip().split('urn:uuid:')[1])
            ar.append(cond['resource']['encounter']['reference'].strip().split('urn:uuid:')[1])
            ar.append(cond['resource']['onsetDateTime'])
            ar.append(cond['resource']['recordedDate'])
            ar.append(cond['resource']['clinicalStatus']['coding'][0]['code'])
            
            arr.append(ar)
    except Exception as e:
#         print(e)
        f_count += 1
        continue

end = time.time()
print(str(f_count)+' Files Failed...')
print(str(len(arr))+' Patient condition bundles extracted as DataFrame in '+str(end-start)+ 'Seconds')

df_condition = pd.DataFrame(arr, columns = cols)

100%|██████████| 1697/1697 [02:19<00:00, 12.14it/s]

0 Files Failed...
76571 Patient condition bundles extracted as DataFrame in 139.79356408119202Seconds





In [15]:
df_condition.head()

Unnamed: 0,code,codeText,patientId,encounterId,onsetDateTime,recordedDate,clinicalStatusCode
0,314529007,Medication review due (situation),3473d82d-617e-6315-6f8d-ec8d03b53f1f,8e688e55-b44b-a7f5-ff90-e652d6a32f8c,1952-06-24T05:21:30-04:00,1952-06-24T05:21:30-04:00,resolved
1,160968000,Risk activity involvement (finding),3473d82d-617e-6315-6f8d-ec8d03b53f1f,a2ea3c52-5e43-8c46-07b4-4ecbd88a902b,1957-07-23T06:26:59-04:00,1957-07-23T06:26:59-04:00,resolved
2,66383009,Gingivitis (disorder),3473d82d-617e-6315-6f8d-ec8d03b53f1f,a2ea3c52-5e43-8c46-07b4-4ecbd88a902b,1957-07-23T06:40:32-04:00,1957-07-23T06:40:32-04:00,resolved
3,427898007,Infection of tooth (disorder),3473d82d-617e-6315-6f8d-ec8d03b53f1f,743f3b17-c971-b6f3-8f42-f782868a6945,1958-08-12T07:02:05-04:00,1958-08-12T07:02:05-04:00,resolved
4,314529007,Medication review due (situation),3473d82d-617e-6315-6f8d-ec8d03b53f1f,a26731bc-b8b1-f798-8bfb-73a1678e02cd,1960-08-09T05:21:30-04:00,1960-08-09T05:21:30-04:00,resolved


In [16]:
df_condition['onsetDateTime'] = pd.to_datetime(df_condition['onsetDateTime'], format="%Y-%m-%dT%H:%M:%S%z", utc=True)
df_condition['recordedDate'] = pd.to_datetime(df_condition['recordedDate'], format="%Y-%m-%dT%H:%M:%S%z", utc=True)

In [None]:
#Extracting resolvedDateTime form Conditions DataFrame

cols = ['patientId','code','encounterId','onsetDateTime','resolvedDateTime','codeText']
arr = []
for name,group in tqdm(df_condition.groupby(['patientId','encounterId','onsetDateTime'])):
    #Groupby Condition Code Again
    for name2, group2 in group.groupby(['code','codeText']):
        ar = []
        # Add patientId
        ar.append(name[0])
        
        # Add code
        ar.append(name2[0])
        
        # Add encounterId
        ar.append(name[1])
        
        # Add onsetDateTime
        ar.append(name[2])
        
        #Get Records with clinicalStatusCode as Resolved
        resolved = group2.query('clinicalStatusCode == "resolved"')
        
        #Add Resolved Date to Array if Resolved Record exists
        if len(resolved) > 0 :
            ar.append(resolved['recordedDate'].max())
        else:
            ar.append(group2['recordedDate'].max())
        
        # Add codeText
        ar.append(name2[1])
        
        arr.append(ar)

df_condition_new = pd.DataFrame(arr, columns = cols)

 38%|███▊      | 24371/63369 [01:20<01:28, 438.28it/s]

In [48]:
df_condition_new['onsetDateTime'] = pd.to_datetime(df_condition_new['onsetDateTime'], 
                                                   format="%Y-%m-%dT%H:%M:%S%z", utc=True)
df_condition_new['resolvedDateTime'] = pd.to_datetime(df_condition_new['resolvedDateTime'], 
                                                      format="%Y-%m-%dT%H:%M:%S%z", utc=True)
df_condition_new.head()

Unnamed: 0,patientId,code,encounterId,onsetDateTime,resolvedDateTime,codeText
0,002249f3-d093-01fc-fbca-e311c8924a19,160904001,0072a09e-2580-7ee4-a713-99f8823c6806,2024-05-09 08:55:34+00:00,2024-05-09 08:55:34+00:00,Part-time employment (finding)
1,002249f3-d093-01fc-fbca-e311c8924a19,741062008,0229998a-17d8-9f60-1323-a69ab137053e,2023-10-19 09:13:13+00:00,2023-10-19 09:13:13+00:00,Not in labor force (finding)
2,002249f3-d093-01fc-fbca-e311c8924a19,314529007,023059a8-14bf-2976-f46d-ebc320fa216a,2017-06-01 08:19:14+00:00,2017-06-01 08:19:14+00:00,Medication review due (situation)
3,002249f3-d093-01fc-fbca-e311c8924a19,73438004,023059a8-14bf-2976-f46d-ebc320fa216a,2017-06-01 09:09:41+00:00,2017-06-01 09:09:41+00:00,Unemployed (finding)
4,002249f3-d093-01fc-fbca-e311c8924a19,73595000,023059a8-14bf-2976-f46d-ebc320fa216a,2017-06-01 09:09:41+00:00,2017-06-01 09:09:41+00:00,Stress (finding)


In [49]:
df_condition_new.query('code == "840539006"')

Unnamed: 0,patientId,code,encounterId,onsetDateTime,resolvedDateTime,codeText
629,0199efbf-89e2-b9ee-3dc6-e408c6b45e95,840539006,3120044a-9dc1-e9fb-4b91-b906475595e8,2020-12-21 08:43:42+00:00,2020-12-21 08:43:42+00:00,Disease caused by severe acute respiratory syn...
1098,03edb551-ed0b-4f99-6f2e-faf4e27bfe8d,840539006,9c96c132-9732-36a7-94ec-bb8b444e6dd8,2020-12-12 09:18:48+00:00,2020-12-12 09:18:48+00:00,Disease caused by severe acute respiratory syn...
1208,049cc526-a1ac-2098-5a09-431ab127c0ca,840539006,899d3ac8-4a8d-f404-d30d-3d03055b97fb,2020-09-15 06:10:41+00:00,2020-09-15 06:10:41+00:00,Disease caused by severe acute respiratory syn...
2936,098391af-100f-33b0-1a8f-10a69ca6ec0d,840539006,2fe6ab9d-9e38-1743-6e2b-efa3ec765e00,2020-10-24 23:21:21+00:00,2020-10-24 23:21:21+00:00,Disease caused by severe acute respiratory syn...
3091,09e4fc1d-e604-5057-9972-61b993e16acd,840539006,b9dcdd34-a768-9714-68b2-0f01d45a90b6,2020-12-28 13:48:59+00:00,2020-12-28 13:48:59+00:00,Disease caused by severe acute respiratory syn...
...,...,...,...,...,...,...
73221,f4c8809d-271f-a8c8-080c-58e15d06f124,840539006,08b8cb35-48c7-cd2b-981e-f0dca478ff66,2020-09-19 19:10:13+00:00,2020-09-19 19:10:13+00:00,Disease caused by severe acute respiratory syn...
74163,f7083ad6-fa63-5151-5463-7ab7ed387fe4,840539006,c1babb57-1192-9e74-f913-8ab76125bfc4,2020-12-27 22:25:27+00:00,2020-12-27 22:25:27+00:00,Disease caused by severe acute respiratory syn...
74793,f88e6529-73a9-081d-812e-0dc3df30870c,840539006,afcfcf84-915d-2808-e6e8-1a108f88e94c,2020-11-16 15:48:06+00:00,2020-11-16 15:48:06+00:00,Disease caused by severe acute respiratory syn...
75840,fc7a2afc-c2ba-80ac-5544-a0ef6d534d6c,840539006,0095eb73-1a2e-fe2e-7568-0fca110a17ba,2020-12-01 13:59:52+00:00,2020-12-01 13:59:52+00:00,Disease caused by severe acute respiratory syn...


840539006 Is the Code for COVID 19

In [50]:
df_condition_new.to_csv(output_folder_path+delim+'Condition.csv')
del df_condition_new
gc.collect()

37

## Encounters

In [51]:
cols = ['id','status','code','codeText','start','end','patientId','location','serviceProvider','encounterClass']

arr = []
start = time.time()
f_count = 0

for file in tqdm(files):
    try:
        #load File
        f = open(file)
        data = json.load(f)
        f.close()
        

        encounters = filter_resource(data, 'Encounter')
        for encounter in encounters:
            ar = []
            
            ar.append(encounter['resource']['id'])
            ar.append(encounter['resource']['status'])
            ar.append(encounter['resource']['type'][0]['coding'][0]['code'])
            ar.append(encounter['resource']['type'][0]['text'])
            ar.append(encounter['resource']['period']['start'])
            ar.append(encounter['resource']['period']['end'])
            ar.append(encounter['resource']['subject']['reference'].strip().split('urn:uuid:')[1])
            ar.append(encounter['resource']['location'][0]['location']['display'])
            ar.append(encounter['resource']['serviceProvider']['display'])
            ar.append(encounter['resource']['class']['code'])

            arr.append(ar)
    except Exception as e:
#         print(e)
        f_count += 1
        continue

end = time.time()
print(str(f_count)+' Files Failed...')
print(str(len(arr))+' Patient encounter bundles extracted as DataFrame in '+str(end-start)+ 'Seconds')

df_encounter = pd.DataFrame(arr, columns = cols)

100%|██████████| 1697/1697 [03:14<00:00,  8.71it/s]

0 Files Failed...
133465 Patient encounter bundles extracted as DataFrame in 194.81184816360474Seconds





In [52]:
df_encounter.head()

Unnamed: 0,id,status,code,codeText,start,end,patientId,location,serviceProvider,encounterClass
0,8e688e55-b44b-a7f5-ff90-e652d6a32f8c,finished,410620009,Well child visit (procedure),1952-06-24T05:21:30-04:00,1952-06-24T05:36:30-04:00,3473d82d-617e-6315-6f8d-ec8d03b53f1f,"MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C.","MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C.",AMB
1,141ebb0c-3b53-212b-a913-a1ac8110ca98,finished,410620009,Well child visit (procedure),1956-07-17T05:21:30-04:00,1956-07-17T05:36:30-04:00,3473d82d-617e-6315-6f8d-ec8d03b53f1f,"MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C.","MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C.",AMB
2,a2ea3c52-5e43-8c46-07b4-4ecbd88a902b,finished,410620009,Well child visit (procedure),1957-07-23T05:21:30-04:00,1957-07-23T05:36:30-04:00,3473d82d-617e-6315-6f8d-ec8d03b53f1f,"MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C.","MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C.",AMB
3,57a99bea-72a8-f49f-fe8d-d4ffce019e24,finished,185349003,Encounter for check up (procedure),1957-08-06T05:21:30-04:00,1957-08-06T08:45:59-04:00,3473d82d-617e-6315-6f8d-ec8d03b53f1f,DLP MARQUETTE GENERAL HOSPITAL LLC,DLP MARQUETTE GENERAL HOSPITAL LLC,AMB
4,5015bdd0-c865-78f8-33ab-cc21419fc012,finished,410620009,Well child visit (procedure),1958-07-29T05:21:30-04:00,1958-07-29T05:36:30-04:00,3473d82d-617e-6315-6f8d-ec8d03b53f1f,"MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C.","MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C.",AMB


In [53]:
df_encounter['encounterClass'].value_counts()

encounterClass
AMB     124430
EMER      5232
IMP       2565
HH        1025
VR         213
Name: count, dtype: int64

In [54]:
df_encounter.to_csv(output_folder_path+delim+'Encounter.csv')
del df_encounter
gc.collect()

0

## Observations

In [55]:
cols = ['id','patientId','issuedDate','effectiveDateTime','category','encounter','code','codeText','value','units','snomedCode','observationType']

arr = []
start = time.time()
f_count = 0

for file in tqdm(files):
    try:
        #load File
        f = open(file)
        data = json.load(f)
        f.close()
        

        observations = filter_resource(data, 'Observation')
        for observation in observations:
            
            if 'component' in observation['resource'].keys():
                for comp in observation['resource']['component']:
                    ar = []
                    ar.append(observation['resource']['id'])
                    ar.append(observation['resource']['subject']['reference'].strip().split('urn:uuid:')[1])
                    ar.append(observation['resource']['issued'])
                    ar.append(observation['resource']['effectiveDateTime'])
                    ar.append(observation['resource']['category'][0]['coding'][0]['code'])
                    ar.append(observation['resource']['encounter']['reference'].strip().split('urn:uuid:')[1])
                    
                    ar.append(comp['code']['coding'][0]['code'])
                    ar.append(comp['code']['coding'][0]['display'])

                    if 'valueCodeableConcept' in comp.keys():
                        ar.append(comp['valueCodeableConcept']['coding'][0]['display'])
                        ar.append(np.nan)
                        ar.append(comp['valueCodeableConcept']['coding'][0]['code'])
                        ar.append('text')
                    elif 'valueQuantity' in comp.keys():
                        ar.append(comp['valueQuantity']['value'])
                        ar.append(comp['valueQuantity']['unit'])
                        ar.append(np.nan)
                        ar.append('numeric')
                    else:
                        ar.append(comp['valueString'])
                        ar.append(np.nan)
                        ar.append(np.nan)
                        ar.append('text')

                    arr.append(ar)
            else:
                ar = []
                ar.append(observation['resource']['id'])
                ar.append(observation['resource']['subject']['reference'].strip().split('urn:uuid:')[1])
                ar.append(observation['resource']['issued'])
                ar.append(observation['resource']['effectiveDateTime'])
                ar.append(observation['resource']['category'][0]['coding'][0]['code'])
                ar.append(observation['resource']['encounter']['reference'].strip().split('urn:uuid:')[1])
                
                ar.append(observation['resource']['code']['coding'][0]['code'])
                ar.append(observation['resource']['code']['coding'][0]['display'])

                if 'valueCodeableConcept' in observation['resource'].keys():
                    ar.append(observation['resource']['valueCodeableConcept']['coding'][0]['display'])
                    ar.append(np.nan)
                    ar.append(observation['resource']['valueCodeableConcept']['coding'][0]['code'])
                    ar.append('text')
                elif 'valueString' in observation['resource'].keys():
                    ar.append(observation['resource']['valueString'])
                    ar.append(np.nan)
                    ar.append(np.nan)
                    ar.append('text')
                else:
                    ar.append(observation['resource']['valueQuantity']['value'])
                    ar.append(observation['resource']['valueQuantity']['unit'])
                    ar.append(np.nan)
                    ar.append('numeric')

                arr.append(ar)
    except Exception as e:
#         print(e)
#         print(observation['resource'])
        f_count += 1
        continue

end = time.time()
print(str(f_count)+' Files Failed...')
print(str(len(arr))+' Patient observation bundles extracted as DataFrame in '+str(end-start)+ 'Seconds')

df_observation = pd.DataFrame(arr, columns = cols)

100%|██████████| 1697/1697 [04:05<00:00,  6.91it/s]


0 Files Failed...
1762128 Patient observation bundles extracted as DataFrame in 245.68359994888306Seconds


In [56]:
df_observation

Unnamed: 0,id,patientId,issuedDate,effectiveDateTime,category,encounter,code,codeText,value,units,snomedCode,observationType
0,ded5c539-9a9a-76a4-aab8-7a031014bb7a,3473d82d-617e-6315-6f8d-ec8d03b53f1f,1956-07-17T05:21:30.014-04:00,1956-07-17T05:21:30-04:00,vital-signs,141ebb0c-3b53-212b-a913-a1ac8110ca98,8302-2,Body Height,161.6,cm,,numeric
1,be4ec649-28c0-89a9-6878-e5aacc1758cd,3473d82d-617e-6315-6f8d-ec8d03b53f1f,1956-07-17T05:21:30.014-04:00,1956-07-17T05:21:30-04:00,vital-signs,141ebb0c-3b53-212b-a913-a1ac8110ca98,72514-3,Pain severity - 0-10 verbal numeric rating [Sc...,2,{score},,numeric
2,0bde420b-fb42-2a16-fca1-b5d49a34b40a,3473d82d-617e-6315-6f8d-ec8d03b53f1f,1956-07-17T05:21:30.014-04:00,1956-07-17T05:21:30-04:00,vital-signs,141ebb0c-3b53-212b-a913-a1ac8110ca98,29463-7,Body Weight,81.3,kg,,numeric
3,b9fbd285-e7a0-3fa6-6295-c23f96333bfe,3473d82d-617e-6315-6f8d-ec8d03b53f1f,1956-07-17T05:21:30.014-04:00,1956-07-17T05:21:30-04:00,vital-signs,141ebb0c-3b53-212b-a913-a1ac8110ca98,39156-5,Body mass index (BMI) [Ratio],31.13,kg/m2,,numeric
4,813affd1-1bdd-403d-40ea-66f47913a077,3473d82d-617e-6315-6f8d-ec8d03b53f1f,1956-07-17T05:21:30.014-04:00,1956-07-17T05:21:30-04:00,vital-signs,141ebb0c-3b53-212b-a913-a1ac8110ca98,59576-9,Body mass index (BMI) [Percentile] Per age and...,98.57,%,,numeric
...,...,...,...,...,...,...,...,...,...,...,...,...
1762123,e3c45510-ba6e-53f8-95f7-c376084a3fcd,3498a089-e321-a167-7851-fc0425d975a3,2021-02-27T22:54:46.874-05:00,2021-02-27T22:54:46-05:00,survey,068b41ce-c9fe-b7ff-cfeb-9169b5dcd18d,59460-6,Fall risk total [Morse Fall Scale],17,{#},,numeric
1762124,a21a8f2c-7730-71ce-1882-183ecdca4deb,3498a089-e321-a167-7851-fc0425d975a3,2021-02-27T22:54:46.874-05:00,2021-02-27T22:54:46-05:00,survey,068b41ce-c9fe-b7ff-cfeb-9169b5dcd18d,59461-4,Fall risk level [Morse Fall Scale],Low Risk (MFS Score 0 - 24),,LA13038-7,text
1762125,3d598931-abd3-68cf-8181-d40fea68f06b,3498a089-e321-a167-7851-fc0425d975a3,2021-02-27T23:30:16.874-05:00,2021-02-27T23:30:16-05:00,survey,068b41ce-c9fe-b7ff-cfeb-9169b5dcd18d,76504-0,Total score [HARK],0,{score},,numeric
1762126,3161db52-07ef-c316-30d2-87690485e2a4,3498a089-e321-a167-7851-fc0425d975a3,2021-02-28T00:04:14.874-05:00,2021-02-28T00:04:14-05:00,survey,068b41ce-c9fe-b7ff-cfeb-9169b5dcd18d,55758-7,Patient Health Questionnaire 2 item (PHQ-2) to...,0,{score},,numeric


In [57]:
df_observation.to_csv(output_folder_path+delim+'Observation.csv')
del df_observation
del ar
del arr
gc.collect()

0

## Care Plan

In [58]:
cols = ['id','status','patientId','start','end','category','code','codeText','intent','encounter','careTeam','activityCode','activityCodeText','activityStatus','activityLocation']

arr = []
start = time.time()
f_count = 0

for file in tqdm(files):
    try:
        #load File
        f = open(file)
        data = json.load(f)
        f.close()
        

        cps = filter_resource(data, 'CarePlan')
        for cp in cps:
            if 'activity' in cp['resource'].keys():
                for activity in cp['resource']['activity']:
                    ar = []
                    ar.append(cp['resource']['id'])
                    ar.append(cp['resource']['status'])
                    ar.append(cp['resource']['subject']['reference'].strip().split('urn:uuid:')[1])
                    ar.append(cp['resource']['period']['start'])

                    if 'end' in cp['resource']['period'].keys():
                        ar.append(cp['resource']['period']['end'])
                    else:
                        ar.append(np.nan)

                    ar.append(cp['resource']['category'][0]['coding'][0]['code'])
                    ar.append(cp['resource']['category'][1]['coding'][0]['code'])
                    ar.append(cp['resource']['category'][1]['coding'][0]['display'])
                    ar.append(cp['resource']['intent'])
                    ar.append(cp['resource']['encounter']['reference'].strip().split('urn:uuid:')[1])
                    ar.append(cp['resource']['careTeam'][0]['reference'].strip().split('urn:uuid:')[1])

                    ar.append(activity['detail']['code']['coding'][0]['code'])
                    ar.append(activity['detail']['code']['coding'][0]['display'])
                    ar.append(activity['detail']['status'])
                    ar.append(activity['detail']['location']['display'])

                    arr.append(ar)
            else:
                ar = []
                ar.append(cp['resource']['id'])
                ar.append(cp['resource']['status'])
                ar.append(cp['resource']['subject']['reference'].strip().split('urn:uuid:')[1])
                ar.append(cp['resource']['period']['start'])

                if 'end' in cp['resource']['period'].keys():
                    ar.append(cp['resource']['period']['end'])
                else:
                    ar.append(np.nan)

                ar.append(cp['resource']['category'][0]['coding'][0]['code'])
                ar.append(cp['resource']['category'][1]['coding'][0]['code'])
                ar.append(cp['resource']['category'][1]['coding'][0]['display'])
                ar.append(cp['resource']['intent'])
                ar.append(cp['resource']['encounter']['reference'].strip().split('urn:uuid:')[1])
                ar.append(cp['resource']['careTeam'][0]['reference'].strip().split('urn:uuid:')[1])

                ar.append(np.nan)
                ar.append(np.nan)
                ar.append(np.nan)
                ar.append(np.nan)

                arr.append(ar)
    except Exception as e:
#         print(e)
        f_count += 1
        continue

end = time.time()
print(str(f_count)+' Files Failed...')
print(str(len(arr))+' Patient CarePlan bundles extracted as DataFrame in '+str(end-start)+ 'Seconds')

df_cp = pd.DataFrame(arr, columns = cols)

100%|██████████| 1697/1697 [03:11<00:00,  8.85it/s]

0 Files Failed...
15666 Patient CarePlan bundles extracted as DataFrame in 191.78041124343872Seconds





In [59]:
df_cp

Unnamed: 0,id,status,patientId,start,end,category,code,codeText,intent,encounter,careTeam,activityCode,activityCodeText,activityStatus,activityLocation
0,7998b1e9-7d77-b6a8-8717-22717b1864fa,active,3473d82d-617e-6315-6f8d-ec8d03b53f1f,1961-08-15T05:21:30-04:00,,assess-plan,735985000,Diabetes self management plan (record artifact),order,224a5cf5-0c98-a4e2-b8fb-fb9cba917269,123b8929-1f7b-eb4f-006d-c95e7c58e086,160670007,Diabetic diet (finding),in-progress,"MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C."
1,7998b1e9-7d77-b6a8-8717-22717b1864fa,active,3473d82d-617e-6315-6f8d-ec8d03b53f1f,1961-08-15T05:21:30-04:00,,assess-plan,735985000,Diabetes self management plan (record artifact),order,224a5cf5-0c98-a4e2-b8fb-fb9cba917269,123b8929-1f7b-eb4f-006d-c95e7c58e086,229065009,Exercise therapy (regime/therapy),in-progress,"MARQUETTE INTERNAL MEDICINE ASSOCIATES, P.C."
2,9339cf07-1194-144e-dcf1-9bb77112a1f4,completed,3473d82d-617e-6315-6f8d-ec8d03b53f1f,1962-10-21T05:21:30-04:00,1962-11-11T04:51:01-05:00,assess-plan,225358003,Wound care (regime/therapy),order,76fd50b5-8cd8-d32d-148e-bbf13503539b,6b299036-c660-1f3f-a878-61dabe6d5676,385949008,Dressing change management (procedure),completed,UPPER PENINSULA HOME HEALTH & HOSPICE
3,9339cf07-1194-144e-dcf1-9bb77112a1f4,completed,3473d82d-617e-6315-6f8d-ec8d03b53f1f,1962-10-21T05:21:30-04:00,1962-11-11T04:51:01-05:00,assess-plan,225358003,Wound care (regime/therapy),order,76fd50b5-8cd8-d32d-148e-bbf13503539b,6b299036-c660-1f3f-a878-61dabe6d5676,439830001,Behavior to prevent infection (observable entity),completed,UPPER PENINSULA HOME HEALTH & HOSPICE
4,549353c6-c873-fa96-3027-6265409f57e0,completed,0ff857ca-1031-c3b4-fb54-fb4b86b8a2d2,2018-09-24T03:17:55-04:00,2018-10-15T03:17:55-04:00,assess-plan,773513001,Physiotherapy care plan (record artifact),order,ddcb2078-ada3-4a40-5e1d-1783bb031de8,3760ff3d-fb4c-8543-f798-bc42f2ef172c,229586001,"Rest, ice, compression and elevation treatment...",completed,UPPER PENINSULA HOME HEALTH & HOSPICE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15661,4cc5e5ce-7edc-bca7-cb5d-e4bbaefeac0e,active,3498a089-e321-a167-7851-fc0425d975a3,2015-07-04T22:11:49-04:00,,assess-plan,735984001,Heart failure self management plan (record art...,order,135fa77c-6e68-e447-bfe9-f0c97fbb3b35,fe1f40a8-f0f5-679f-c015-2395e0f51160,183301007,Physical exercises (regime/therapy),in-progress,FARMBROOK INTERNAL MEDICINE INC
15662,d70bc3aa-8b5e-fe74-5249-4df951b778d3,completed,3498a089-e321-a167-7851-fc0425d975a3,2018-01-04T21:11:49-05:00,2018-01-07T22:05:29-05:00,assess-plan,736353004,Inpatient care plan (record artifact),order,bc5bcef9-6537-e62d-74f8-0a02fa4dd951,67a9d020-61da-e08d-f02f-9caf41da340e,385715006,Cardiac care (regime/therapy),completed,FARMBROOK INTERNAL MEDICINE INC
15663,d70bc3aa-8b5e-fe74-5249-4df951b778d3,completed,3498a089-e321-a167-7851-fc0425d975a3,2018-01-04T21:11:49-05:00,2018-01-07T22:05:29-05:00,assess-plan,736353004,Inpatient care plan (record artifact),order,bc5bcef9-6537-e62d-74f8-0a02fa4dd951,67a9d020-61da-e08d-f02f-9caf41da340e,386619000,Low sodium diet (finding),completed,FARMBROOK INTERNAL MEDICINE INC
15664,e3f83ed8-0175-7eb7-c90d-e636db54eae4,completed,3498a089-e321-a167-7851-fc0425d975a3,2021-01-08T21:11:49-05:00,2021-01-18T22:09:49-05:00,assess-plan,736353004,Inpatient care plan (record artifact),order,eb94632a-fa1e-f286-b3ef-84f3507d778a,d5423da7-e8b3-ff59-d53c-d9b459240a70,385715006,Cardiac care (regime/therapy),completed,FARMBROOK INTERNAL MEDICINE INC


In [60]:
df_cp.to_csv(output_folder_path+delim+'CarePlan.csv')
del df_cp
gc.collect()

0