In [39]:
import pandas as pd
import glob

file_paths = glob.glob("./Datasets/*.json")
dfs = [pd.read_json(file_path) for file_path in file_paths]
combined_df = pd.concat(dfs, ignore_index=True)


In [40]:
entries=pd.json_normalize(combined_df.entry)

In [41]:
Encounter_data = entries[entries["resource.resourceType"]=="Encounter"]
Immunization_data = entries[entries["resource.resourceType"]=="Immunization"]


In [42]:
Immunization_data.columns

Index(['fullUrl', 'resource.id', 'resource.text.status', 'resource.text.div',
       'resource.extension', 'resource.identifier', 'resource.name',
       'resource.telecom', 'resource.gender', 'resource.birthDate',
       'resource.address', 'resource.maritalStatus.coding',
       'resource.multipleBirthBoolean', 'resource.photo',
       'resource.resourceType', 'resource.status', 'resource.class.code',
       'resource.type', 'resource.patient.reference', 'resource.period.start',
       'resource.period.end', 'resource.clinicalStatus',
       'resource.verificationStatus', 'resource.code.coding',
       'resource.subject.reference', 'resource.context.reference',
       'resource.onsetDateTime', 'resource.abatementDateTime',
       'resource.encounter.reference', 'resource.effectiveDateTime',
       'resource.valueQuantity.value', 'resource.valueQuantity.unit',
       'resource.valueQuantity.system', 'resource.valueQuantity.code',
       'resource.component', 'resource.date', 'resource

In [64]:
encounter_ref = Encounter_data[['fullUrl','resource.patient.reference']]

In [70]:
merged_data = pd.merge(
    encounter_ref,
    Immunization_data,
    
    left_on="fullUrl",  # Assuming patient IDs are under "resource.id" in encounter data
    right_on="resource.encounter.reference",  # Assuming patient IDs are in a column named "Patient id" in the CSV file
    how="right" , # Use "left" join to keep all encounter records
)
merged_data

Unnamed: 0,fullUrl_x,resource.patient.reference_x,fullUrl_y,resource.id,resource.text.status,resource.text.div,resource.extension,resource.identifier,resource.name,resource.telecom,...,resource.dispenseRequest.numberOfRepeatsAllowed,resource.dispenseRequest.quantity.value,resource.dispenseRequest.quantity.unit,resource.dispenseRequest.expectedSupplyDuration.value,resource.dispenseRequest.expectedSupplyDuration.unit,resource.dispenseRequest.expectedSupplyDuration.system,resource.dispenseRequest.expectedSupplyDuration.code,resource.criticality,resource.assertedDate,resource.multipleBirthInteger
0,urn:uuid:2c722ad1-5a59-4f6a-90cc-c55f86bbd9c6,urn:uuid:80bb41d3-ebb4-42e6-b087-6ee89b0463a9,,,,,,,,,...,,,,,,,,,,
1,urn:uuid:f5b01614-f569-4e9e-b482-cc0231a7c20a,urn:uuid:80bb41d3-ebb4-42e6-b087-6ee89b0463a9,,,,,,,,,...,,,,,,,,,,
2,urn:uuid:f5b01614-f569-4e9e-b482-cc0231a7c20a,urn:uuid:80bb41d3-ebb4-42e6-b087-6ee89b0463a9,,,,,,,,,...,,,,,,,,,,
3,urn:uuid:1460d856-b01a-4c3b-b8c7-07fd86cac5fe,urn:uuid:80bb41d3-ebb4-42e6-b087-6ee89b0463a9,,,,,,,,,...,,,,,,,,,,
4,urn:uuid:df35b4c3-93ee-4f2c-b85e-af2f110f6d30,urn:uuid:80bb41d3-ebb4-42e6-b087-6ee89b0463a9,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
227,urn:uuid:01e1e451-d7c5-4876-b4e7-d499eaada844,urn:uuid:aa3973d9-b64f-4a36-8cb6-f2719080b52f,,,,,,,,,...,,,,,,,,,,
228,urn:uuid:638fee81-9b8f-4896-8536-407e34db67c0,urn:uuid:aa3973d9-b64f-4a36-8cb6-f2719080b52f,,,,,,,,,...,,,,,,,,,,
229,urn:uuid:638fee81-9b8f-4896-8536-407e34db67c0,urn:uuid:aa3973d9-b64f-4a36-8cb6-f2719080b52f,,,,,,,,,...,,,,,,,,,,
230,urn:uuid:ba8b290e-98ba-4c40-90bd-a0a500b339b8,urn:uuid:aa3973d9-b64f-4a36-8cb6-f2719080b52f,,,,,,,,,...,,,,,,,,,,


In [93]:
Immunization_name=pd.json_normalize(merged_data['resource.vaccineCode.coding'].explode())['display']


In [94]:
# Step 6: Extract the Necessary Fields and Create the Final DataFrame
Immunization_output = pd.DataFrame({
    'Status': merged_data['resource.status'],
    'Immunization_data':merged_data['resource.date'],
    "Immunization Name":Immunization_name,
    "Encounter_id":merged_data['resource.encounter.reference'].str.replace('urn:uuid:', ''),
    'patient_id': merged_data['resource.patient.reference_x'].str.replace('urn:uuid:', ''),
})
Immunization_output

Unnamed: 0,Status,Immunization_data,Immunization Name,Encounter_id,patient_id
0,completed,2010-05-12T10:33:51-04:00,"Influenza, seasonal, injectable, preservative ...",2c722ad1-5a59-4f6a-90cc-c55f86bbd9c6,80bb41d3-ebb4-42e6-b087-6ee89b0463a9
1,completed,2011-07-02T14:56:51-04:00,"Influenza, seasonal, injectable, preservative ...",f5b01614-f569-4e9e-b482-cc0231a7c20a,80bb41d3-ebb4-42e6-b087-6ee89b0463a9
2,completed,2011-07-02T14:56:51-04:00,Td (adult) preservative free,f5b01614-f569-4e9e-b482-cc0231a7c20a,80bb41d3-ebb4-42e6-b087-6ee89b0463a9
3,completed,2012-06-03T09:20:31-04:00,"Influenza, seasonal, injectable, preservative ...",1460d856-b01a-4c3b-b8c7-07fd86cac5fe,80bb41d3-ebb4-42e6-b087-6ee89b0463a9
4,completed,2013-05-24T07:52:53-04:00,"Influenza, seasonal, injectable, preservative ...",df35b4c3-93ee-4f2c-b85e-af2f110f6d30,80bb41d3-ebb4-42e6-b087-6ee89b0463a9
...,...,...,...,...,...
227,completed,2014-09-02T13:05:08-04:00,zoster,01e1e451-d7c5-4876-b4e7-d499eaada844,aa3973d9-b64f-4a36-8cb6-f2719080b52f
228,completed,2015-05-15T04:15:28-04:00,"Influenza, seasonal, injectable, preservative ...",638fee81-9b8f-4896-8536-407e34db67c0,aa3973d9-b64f-4a36-8cb6-f2719080b52f
229,completed,2015-05-15T04:15:28-04:00,Td (adult) preservative free,638fee81-9b8f-4896-8536-407e34db67c0,aa3973d9-b64f-4a36-8cb6-f2719080b52f
230,completed,2016-02-04T22:15:59-05:00,"Influenza, seasonal, injectable, preservative ...",ba8b290e-98ba-4c40-90bd-a0a500b339b8,aa3973d9-b64f-4a36-8cb6-f2719080b52f
