In [1]:
import datetime
import json
import pickle
import re
from dataclasses import dataclass

import requests
from bs4 import BeautifulSoup

# Disable SSL verification warnings with Requests
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)

In [2]:
# Define object to store alias information
@dataclass
class CodeSystem:
    name: str
    uri: str = None
    hl7_webpage: str = None
    alias: str = None

In [3]:
# Load and parse code systems from terminology.hl7.org
html = requests.get('https://terminology.hl7.org/codesystems.html', verify=False).text
soup = BeautifulSoup(html)

In [4]:
# Get canonical URIs for each of the terminology.hl7.org code systems
#
# This cell may take a few minutes to run as it has to scrape around 1000 web pages.
# To facilicate development, this cell will automatically cache its output into
# `code_systems.pickle` (which is gitignored). Delete this file if you want to
# refresh the code systems to match the latest version of terminology.hl7.org.

try:
    p = pickle.load(open("code_systems.pickle", "rb"))
    print(f"Loading code systems from \"code_systems.pickle\" cache (last updated {p['pickled_at']}).")
    code_systems = p['code_systems']
except:
    code_systems = [CodeSystem(hl7_webpage=l['href'], name=l.text) for l in soup.select('ul li a') if l['href'].startswith('CodeSystem-')]

    # Get URIs
    for l in code_systems:
        raw = requests.get('https://terminology.hl7.org/' + l.hl7_webpage.replace('.html', '.json'), verify=False).text
        j = json.loads(raw)
        l.uri = j['url']
    
    # Cache code systems for faster future runs where a refresh is not needed
    pickle.dump( {"pickled_at": datetime.datetime.now().isoformat(), "code_systems": code_systems}, open("code_systems.pickle", "wb" ) )

Loading code systems from "code_systems.pickle" cache (last updated 2021-06-15T07:02:40.951027).


In [5]:
# Create FSH aliases for each code system
#
# The aliaes are based on the slugs provided on https://terminology.hl7.org/codesystems.html, or
# a slug extracted from the canonical URI.

for l in code_systems:
    # If this is a THO code system, use the slug from that
    if l.uri.startswith('http://terminology.hl7.org'):
        l.alias = l.uri.replace('http://terminology.hl7.org/CodeSystem/', '')
    
    # Otherwise, use the slug from the HL7 webpage filename for the code system
    else:
        l.alias = l.hl7_webpage.replace('CodeSystem-', '').replace('.html', '')

    # There are a bunch of `v2-0000`-syntax code systems. Descriptive aliases would be better,
    # so use the text from the code system page for these instead
    if re.match(r"^v2-[0-9]+$", l.alias):
        # Allowlist for characters in the alias
        name_for_alias = re.sub(r'[^a-zA-Z0-9\-]', "", l.name)
        l.alias = 'v2-' + name_for_alias

    # Convert slash to -
    l.alias = l.alias.replace('/', '-')

    # Add leading $
    l.alias = "$" + l.alias

In [6]:
# Helper function to generate the files containing FSH aliases
def generate_alias_file(code_systems, path, name, desc):
    output = []

    # Generate FSH
    for i, l in enumerate(code_systems):
        # Figure out if the alias is duplicated -- if so, increment
        previous_duplicates = len([c for c in code_systems[0:i] if c.alias == l.alias])
        if previous_duplicates > 0:
            l.alias = f"{l.alias}{previous_duplicates+1}"

        output.append(f"Alias:   {l.alias} = {l.uri} // {l.name}")

    # Vertical alignment
    max_equals = max([e.index('=') for e in output])
    max_comment = max([e.index('// ') for e in output])
    output = [e.split('=')[0].ljust(max_equals) + '=' + e.split('=')[1] for e in output]
    output = [e.split('// ')[0].ljust(max_comment) + '// ' + e.split('// ')[1] for e in output]

    # Write file
    with open(path, "w") as text_file:
        print(name, file=text_file)
        print(desc + "\n", file=text_file)
        print("\n".join(output), file=text_file)

    print(f"{len(output)} aliases written to {path}")

## Code system aliases - V2

In [7]:
path = "../../Examples/Aliases/HL7-code-systems-V2-aliases.fsh"
generate_alias_file(
    [c for c in code_systems if c.alias.startswith('$v2-')],
    path,
    "// @Name: HL7 V2 aliases (complete)",
    f"// @Description: Aliases for all V2 code systems defined at https://terminology.hl7.org/codesystems.html. Generated {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} by https://github.com/FSHSchool/FSHOnline-Examples/tree/main/Scripts/Aliases/HL7.ipynb. Leading $ signs are optional but are useful for visually distinguishing aliases from other names."
)

424 aliases written to ../../Examples/Aliases/HL7-code-systems-V2-aliases.fsh


## Code system aliases - V3

In [8]:
path = "../../Examples/Aliases/HL7-code-systems-V3-aliases.fsh"
generate_alias_file(
    [c for c in code_systems if c.alias.startswith('$v3-')],
    path,
    "// @Name: HL7 V3 aliases (complete)",
    f"// @Description: Aliases for all V3 code systems defined at https://terminology.hl7.org/codesystems.html. Generated {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} by https://github.com/FSHSchool/FSHOnline-Examples/tree/main/Scripts/Aliases/HL7.ipynb. Leading $ signs are optional but are useful for visually distinguishing aliases from other names."
)

214 aliases written to ../../Examples/Aliases/HL7-code-systems-V3-aliases.fsh


## Code system aliases - all other

In [9]:
path = "../../Examples/Aliases/HL7-code-systems-other-aliases.fsh"
generate_alias_file(
    [c for c in code_systems if not c.alias.startswith('$v3-') and not c.alias.startswith('$v2-')],
    path,
    "// @Name: HL7 other aliases (complete)",
    f"// @Description: Aliases for all code systems defined at https://terminology.hl7.org/codesystems.html that are not part of V2 or V3. Generated {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} by https://github.com/FSHSchool/FSHOnline-Examples/tree/main/Scripts/Aliases/HL7.ipynb. Leading $ signs are optional but are useful for visually distinguishing aliases from other names."
)

451 aliases written to ../../Examples/Aliases/HL7-code-systems-other-aliases.fsh
