In [14]:
import json
import fhir.resources
print(fhir.resources.__version__)
with open('test/patients.ndjson', 'r') as file:
    dataa = [json.loads(line) for line in file]

dataa[0]


6.5.0


{'resourceType': 'Patient',
 'identifier': [{'type': {'coding': [{'system': 'https://fhir.cerner.com/codeSet/4',
      'code': '10',
      'display': 'MRN',
      'userSelected': True},
     {'system': 'http://terminology.hl7.org/CodeSystem/v2-0203',
      'code': 'MR',
      'display': 'Medical record number'}],
    'text': 'MRN'},
   'system': 'urn:oid:2.16.840.1.113883.3.552',
   'value': 'pat_1'}],
 'name': [{'use': 'official', 'family': 'Bernier607', 'given': ['Edmond919']}],
 'gender': 'male',
 'birthDate': '1942-12-16',
 'address': [{'line': ['244 Borer Alley Suite 88'],
   'city': 'Newton',
   'state': 'MA',
   'postalCode': '02467',
   'country': 'US'}]}

In [15]:
import pandas as pd
from fhir.resources.patient import Patient
from fhir.resources.humanname import HumanName
from fhir.resources.address import Address
from fhir.resources.identifier import Identifier
from datetime import datetime, date
import json

# Define a function to parse and format birth dates in YYYY-MM-DD format
def parse_date(date_str):
    try:
        # Parse the date from MM/DD/YYYY format
        birth_date = datetime.strptime(date_str, "%m/%d/%Y").date()
        # Return the date in YYYY-MM-DD format as a string
        return birth_date.isoformat()
    except ValueError:
        print(f"Invalid date format for {date_str}. Expected MM/DD/YYYY.")
        return None

# Define functions similar to each method in the Java code
def set_identifiers(patient, row):
    identifier = Identifier(
        type={
            "coding": [
                {
                    "system": "https://fhir.cerner.com/codeSet/4",
                    "code": "10",
                    "display": "MRN",
                    "userSelected": True
                },
                {
                    "system": "http://terminology.hl7.org/CodeSystem/v2-0203",
                    "code": "MR",
                    "display": "Medical record number"
                }
            ],
            "text": "MRN"
        },
        system="urn:oid:2.16.840.1.113883.3.552",
        value=row['id']
    )
    patient.identifier = [identifier]

def set_names(patient, row):
    name = HumanName(use="official", family=row.get('last_name', ''), given=[row.get('first_name', '')])
    patient.name = [name]

def set_gender(patient, gender_str):
    gender_map = {
        "male": "male",
        "m": "male",
        "female": "female",
        "f": "female"
    }
    patient.gender = gender_map.get(gender_str.lower(), "unknown")

def set_address(patient, row):
    address = Address(
        line=[row.get('address', '')],
        city=row.get('city', ''),
        state=row.get('state', ''),
        postalCode=str(row.get('zip_code', '')),
        country=row.get('country', '')
    )
    patient.address = [address]

def set_birthdate(patient, birth_date_str):
    birth_date = parse_date(birth_date_str)
    if birth_date:
        patient.birthDate = birth_date  # This should be a string in ISO format

# Main function to load TSV, create Patient resources, and map data to FHIR
def tsv_to_fhir_patients(tsv_file):
    df = pd.read_csv(tsv_file, sep='\t',dtype={'zip_code': str})
    df['zip_code'] = df['zip_code'].astype(str)
    patients = []

    for _, row in df.iterrows():
        patient = Patient(resourceType="Patient")
        set_identifiers(patient, row)
        set_names(patient, row)
        set_gender(patient, row.get('gender', ''))
        set_address(patient, row)
        set_birthdate(patient, row.get('birth_date', ''))
        patients.append(patient.dict())
    
    return patients

# Function to recursively convert datetime.date to string in YYYY-MM-DD format
def convert_dates(record):
    if isinstance(record, dict):
        for key, value in record.items():
            if isinstance(value, date):  # If the value is a date object
                record[key] = value.strftime('%Y-%m-%d')  # Convert it to a string
            elif isinstance(value, list):  # If the value is a list of items
                for item in value:
                    convert_dates(item)  # Recursively handle nested dictionaries
            else:
                convert_dates(value)  # Recursively handle nested dictionaries in the value
    return record

# Function to save the data in NDJSON format
def save_to_ndjson(data, output_file):
    with open(output_file, 'w') as f:
        for record in data:
            # Convert dates before writing
            record = convert_dates(record)
            # Convert each OrderedDict to JSON and write it to the file
            f.write(json.dumps(record) + '\n')

# Example usage
tsv_file = 'sample_health_data_v2/patients.tsv'
output_file = 'patients.ndjson'
patients = tsv_to_fhir_patients(tsv_file)

# Save to NDJSON
save_to_ndjson(patients, output_file)

print(f"Data has been saved to {output_file} in NDJSON format.")


Data has been saved to patients.ndjson in NDJSON format.


In [16]:
Patient()

Patient(resource_type='Patient', fhir_comments=None, id=None, implicitRules=None, implicitRules__ext=None, language=None, language__ext=None, meta=None, contained=None, extension=None, modifierExtension=None, text=None, active=None, active__ext=None, address=None, birthDate=None, birthDate__ext=None, communication=None, contact=None, deceasedBoolean=None, deceasedBoolean__ext=None, deceasedDateTime=None, deceasedDateTime__ext=None, gender=None, gender__ext=None, generalPractitioner=None, identifier=None, link=None, managingOrganization=None, maritalStatus=None, multipleBirthBoolean=None, multipleBirthBoolean__ext=None, multipleBirthInteger=None, multipleBirthInteger__ext=None, name=None, photo=None, telecom=None)

In [17]:
# Replace 'file.ndjson' with the path to your NDJSON file
with open('patients.ndjson', 'r') as file:
    data = [json.loads(line) for line in file]

# `data` is now a list of dictionaries (one for each JSON object in the file)
data

'''
{'resourceType': 'Patient',
  'identifier': [{'type': {'coding': [{'system': 'https://fhir.cerner.com/codeSet/4',
       'code': '10',
       'display': 'MRN',
       'userSelected': True},
      {'system': 'http://terminology.hl7.org/CodeSystem/v2-0203',
       'code': 'MR',
       'display': 'Medical record number'}],
     'text': 'MRN'},
    'system': 'urn:oid:2.16.840.1.113883.3.552',
    'value': 'pat_1'}],
  'name': [{'use': 'official',
    'family': 'Bernier607',
    'given': ['Edmond919']}],
  'gender': 'male',
  'birthDate': '1942-12-16',
  'address': [{'line': ['244 Borer Alley Suite 88'],
    'city': 'Newton',
    'state': 'MA',
    'postalCode': '02467',
    'country': 'US'}]},
'''

"\n{'resourceType': 'Patient',\n  'identifier': [{'type': {'coding': [{'system': 'https://fhir.cerner.com/codeSet/4',\n       'code': '10',\n       'display': 'MRN',\n       'userSelected': True},\n      {'system': 'http://terminology.hl7.org/CodeSystem/v2-0203',\n       'code': 'MR',\n       'display': 'Medical record number'}],\n     'text': 'MRN'},\n    'system': 'urn:oid:2.16.840.1.113883.3.552',\n    'value': 'pat_1'}],\n  'name': [{'use': 'official',\n    'family': 'Bernier607',\n    'given': ['Edmond919']}],\n  'gender': 'male',\n  'birthDate': '1942-12-16',\n  'address': [{'line': ['244 Borer Alley Suite 88'],\n    'city': 'Newton',\n    'state': 'MA',\n    'postalCode': '02467',\n    'country': 'US'}]},\n"

In [18]:
dataa

[{'resourceType': 'Patient',
  'identifier': [{'type': {'coding': [{'system': 'https://fhir.cerner.com/codeSet/4',
       'code': '10',
       'display': 'MRN',
       'userSelected': True},
      {'system': 'http://terminology.hl7.org/CodeSystem/v2-0203',
       'code': 'MR',
       'display': 'Medical record number'}],
     'text': 'MRN'},
    'system': 'urn:oid:2.16.840.1.113883.3.552',
    'value': 'pat_1'}],
  'name': [{'use': 'official',
    'family': 'Bernier607',
    'given': ['Edmond919']}],
  'gender': 'male',
  'birthDate': '1942-12-16',
  'address': [{'line': ['244 Borer Alley Suite 88'],
    'city': 'Newton',
    'state': 'MA',
    'postalCode': '02467',
    'country': 'US'}]},
 {'resourceType': 'Patient',
  'identifier': [{'type': {'coding': [{'system': 'https://fhir.cerner.com/codeSet/4',
       'code': '10',
       'display': 'MRN',
       'userSelected': True},
      {'system': 'http://terminology.hl7.org/CodeSystem/v2-0203',
       'code': 'MR',
       'display': 'Med

In [19]:
import json

def compare_ndjson_files(file1, file2):
    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        lines1 = f1.readlines()
        lines2 = f2.readlines()

        # Check if the number of lines match
        if len(lines1) != len(lines2):
            return False, "The NDJSON files have a different number of lines."

        differences = []

        # Compare line by line
        for i, (line1, line2) in enumerate(zip(lines1, lines2)):
            json_obj1 = json.loads(line1)
            json_obj2 = json.loads(line2)

            # Compare the JSON objects
            if json_obj1 != json_obj2:
                differences.append({
                    "line": i + 1,  # Line number (1-based index)
                    "file1": json_obj1,
                    "file2": json_obj2
                })

        return True if not differences else False, differences

def print_differences(differences):
    """Print the differences in a readable format."""
    if not differences:
        print("The NDJSON files are the same.")
    else:
        for diff in differences:
            print(f"Difference found in line {diff['line']}:")
            print(f"- File 1: {json.dumps(diff['file1'], indent=2)}")
            print(f"+ File 2: {json.dumps(diff['file2'], indent=2)}")
            print("-" * 40)

# Example usage
# Example usage:
file1 = 'patients.ndjson'
file2 = 'test/patients.ndjson'

are_same, differences = compare_ndjson_files(file1, file2)

if are_same:
    print("The NDJSON files are the same.")
else:
    print("The NDJSON files are different:")
    print_differences(differences)


The NDJSON files are the same.
