In [10]:
import requests
from bs4 import BeautifulSoup
import json
from dataclasses import dataclass
import re
import datetime

# Disable SSL verification warnings with Requests
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)

In [2]:
@dataclass
class CodeSystem:
    name: str
    uri: str = None
    hl7_webpage: str = None
    alias: str = None

In [3]:
html = requests.get('https://terminology.hl7.org/codesystems.html', verify=False).text
soup = BeautifulSoup(html)

In [4]:
# This cell may take a few minutes to run as it has to scrape around 1000 web pages

code_systems = [CodeSystem(hl7_webpage=l['href'], name=l.text) for l in soup.select('ul li a') if l['href'].startswith('CodeSystem-')]

# Get URIs
for l in code_systems:
    raw = requests.get('https://terminology.hl7.org/' + l.hl7_webpage.replace('.html', '.json'), verify=False).text
    j = json.loads(raw)
    l.uri = j['url']

In [5]:
# Set aliases
for l in code_systems:
    # If this is a THO code system, use the slug from that
    if l.uri.startswith('http://terminology.hl7.org'):
        l.alias = l.uri.replace('http://terminology.hl7.org/CodeSystem/', '')
    
    # Otherwise, use the slug from the HL7 webpage filename for the code system
    else:
        l.alias = l.hl7_webpage.replace('CodeSystem-', '').replace('.html', '')

    # There are a bunch of `v2-0000`-syntax code systems. Descriptive aliases would be better, so use the text from the code system page for these instead
    if re.match(r"^v2-[0-9]+$", l.alias):
        l.alias = 'v2-' + l.name

    # Convert slash to -
    l.alias = l.alias.replace('/', '-')

In [15]:
def generate_alias_file(code_systems, path):
    output = []
    for l in code_systems:
        output.append(f"Alias: {l.alias} = {l.uri} // {l.name}")

    max_equals = max([e.index('=') for e in output])
    max_comment = max([e.index('// ') for e in output])

    output = [e.split('=')[0].ljust(max_equals) + '=' + e.split('=')[1] for e in output]
    output = [e.split('// ')[0].ljust(max_comment) + '// ' + e.split('// ')[1] for e in output]

    with open(path, "w") as text_file:
        print("// Automatically generated by https://github.com/FSHSchool/FSHOnline-Examples/tree/main/Examples/Aliases/HL7-code-system-script/make-aliases.ipynb", file=text_file)
        print(f"// Last updated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", file=text_file)
        print("\n".join(output), file=text_file)

    print(f"{len(output)} aliases written to {path}")

## Code system aliases - V2

In [19]:
path = "../HL7-code-systems-V2-aliases.fsh"
generate_alias_file([c for c in code_systems if c.alias.startswith('v2-')], path)

424 aliases written to ../HL7-code-systems-V2-aliases.fsh


## Code system aliases - V3

In [20]:
path = "../HL7-code-systems-V3-aliases.fsh"
generate_alias_file([c for c in code_systems if c.alias.startswith('v3-')], path)

214 aliases written to ../HL7-code-systems-V3-aliases.fsh


## Code system aliases - all other

In [21]:
path = "../HL7-code-systems-other-aliases.fsh"
generate_alias_file([c for c in code_systems if not c.alias.startswith('v3-') and not c.alias.startswith('v2-')], path)

451 aliases written to ../HL7-code-systems-other-aliases.fsh
