In [8]:
import json
import csv
from datetime import datetime

# Function to read and parse JSON files
def read_json_file(file_path):
    data = []
    with open(file_path, "r") as f:
        for line in f:
            json_data = json.loads(line)
            data.append(json_data)
    return data

# Convert timestamp to unix format
def convert_to_unix(timestamp):
    return int(datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S%z").timestamp())

# Define the file paths
patient_file_path = "/home/rajabala/Downloads/brijesh/mimic-fhir/Patient.ndjson"
condition_file_path = "/home/rajabala/Downloads/brijesh/mimic-fhir/Condition.ndjson"
encounter_file_path = "/home/rajabala/Downloads/brijesh/mimic-fhir/Encounter.ndjson"
encounter_icu_file_path = "/home/rajabala/Downloads/brijesh/mimic-fhir/EncounterICU.ndjson"
csv_file_path = "/home/rajabala/Downloads/brijesh/out/patientcondition.csv"

# Step 1: Associate conditions with patients
patients = read_json_file(patient_file_path)
conditions = read_json_file(condition_file_path)
encounters = read_json_file(encounter_file_path)
encounters_icu = read_json_file(encounter_icu_file_path)

patient_conditions = {}
for condition in conditions:
    patient_id = condition['subject']['reference'].split('/')[-1]
    condition_data = {
        'code': condition['code']['coding'][0]['code'],
        'description': condition['code']['coding'][0]['display']
    }
    if patient_id in patient_conditions:
        patient_conditions[patient_id].append(condition_data)
    else:
        patient_conditions[patient_id] = [condition_data]

# Step 2: Estimate time for each condition
patient_condition_timestamps = []
all_encounters = encounters + encounters_icu
for patient_id, conditions_list in patient_conditions.items():
    for condition in conditions_list:
        condition_code = condition['code']
        for encounter in all_encounters:
            if encounter['subject']['reference'].split('/')[-1] == patient_id:
                encounter_start_time = encounter['period']['start']
                patient_condition_timestamps.append({
                    'pid': patient_id,
                    'time': encounter_start_time,
                    'code': condition_code,
                    'description': condition['description']
                })
                break

# Step 3: Write data to CSV file
with open(csv_file_path, mode='w', newline='') as csv_file:
    fieldnames = ['pid', 'time', 'code', 'description']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    writer.writeheader()
    for record in patient_condition_timestamps:
        record['time'] = convert_to_unix(record['time'])
        writer.writerow(record)

print("CSV file generated successfully!")

CSV file generated successfully!
