# Importing libraries

In [24]:
import pandas as pd
import json
import csv
from datetime import datetime

# Step 1: Read and understand the JSON format of each file


In [25]:
# Step 1: Read and understand the JSON format of each file
file_paths = (r"D:\Downloads\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-fhir\Patient.ndjson",
              r"D:\Downloads\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-fhir\Condition.ndjson",
              r"D:\Downloads\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-fhir\Encounter.ndjson",
              r"D:\Downloads\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-iv-clinical-database-demo-on-fhir-2.0\mimic-fhir\EncounterICU.ndjson"
            )


# Step 2: Parse the Patient JSON file

In [26]:
def read_ndjson(file_path):
    data = []
    with open(file_path, 'r') as file:
        for line in file:
            data.append(json.loads(line))
    return data

In [27]:
patients = {}
patient_data = read_ndjson(file_paths[0])
for patient in patient_data:
    patients[patient['id']] = []

# Step 3: Gather conditions for each patient, gather their conditions

In [28]:
condition_data = read_ndjson(file_paths[1])
for condition in condition_data:
    patient_id = condition['subject']['reference'].split('/')[-1]
    patients[patient_id].append(condition)

encounters = {}
encounter_data = read_ndjson(file_paths[2])
encounter_icu_data = read_ndjson(file_paths[3])

for encounter in encounter_data + encounter_icu_data:
    encounters[encounter['id']] = encounter

# Step 4: Create a CSV file

In [29]:
with open('patient_conditions.csv', 'w', newline='') as csvfile:
    fieldnames = ['pid', 'time', 'code', 'description']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    for patient_id, conditions in patients.items():
        for condition in conditions:
            encounter_id = condition.get('encounter', {}).get('reference', '').split('/')[-1]
            encounter = encounters.get(encounter_id)
            if encounter:
                start_time = encounter.get('period', {}).get('start', '')
                if start_time:
                    timestamp = datetime.strptime(start_time, '%Y-%m-%dT%H:%M:%S%z').timestamp()
                    description = condition.get('code', {}).get('coding', [{}])[0].get('display', '')
                    writer.writerow({
                        'pid': patient_id,
                        'time': int(timestamp),
                        'code': condition['code']['coding'][0]['code'],
                        'description': description
                    })


In [30]:
Final_Output = pd.read_csv("patient_conditions.csv")
Final_Output

Unnamed: 0,pid,time,code,description
0,0a8eebfd-a352-522e-89f0-1d4a13abdebc,6644651700,V462,"Other dependence on machines, supplemental oxygen"
1,0a8eebfd-a352-522e-89f0-1d4a13abdebc,6642340020,78959,Other ascites
2,0a8eebfd-a352-522e-89f0-1d4a13abdebc,6642340020,07071,Unspecified viral hepatitis C with hepatic coma
3,0a8eebfd-a352-522e-89f0-1d4a13abdebc,6642340020,5715,Cirrhosis of liver without mention of alcohol
4,0a8eebfd-a352-522e-89f0-1d4a13abdebc,6644651700,29680,"Bipolar disorder, unspecified"
...,...,...,...,...
4176,fa5fbf9c-23e3-5ef3-9cfb-24d20a950314,6563036820,2724,Other and unspecified hyperlipidemia
4177,fa5fbf9c-23e3-5ef3-9cfb-24d20a950314,6587319900,2761,Hyposmolality and/or hyponatremia
4178,fa5fbf9c-23e3-5ef3-9cfb-24d20a950314,6700350540,F419,"Anxiety disorder, unspecified"
4179,fa5fbf9c-23e3-5ef3-9cfb-24d20a950314,6613157160,2809,"Iron deficiency anemia, unspecified"


In [31]:
Final_Output.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4181 entries, 0 to 4180
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   pid          4181 non-null   object
 1   time         4181 non-null   int64 
 2   code         4181 non-null   object
 3   description  4181 non-null   object
dtypes: int64(1), object(3)
memory usage: 130.8+ KB
