In [1]:
import bs4, requests, re
import warnings
from bs4 import XMLParsedAsHTMLWarning
warnings.filterwarnings('ignore', category=XMLParsedAsHTMLWarning)

base_url = "http://build.fhir.org/ig/HL7/US-Core/"

Pages = {
    "Home": "index.html",
    "Conformance": None,
    "General Requirements": "general-requirements.html",
    "Must Support": "must-support.html",
    "SMART on FHIR Obligations and Capabilities": "scopes.html",
    "Guidance": None,
    "USCDI": "uscdi.html",
    "General Guidance": "general-guidance.html",
    "Clinical Notes": "clinical-notes.html",
    "Medication List": "medication-list.html",
    "Basic Provenance": "basic-provenance.html",
    "Using Provenance To Target Resource Elements": "element-level-provenance.html",
    "Screening and Assessments": "screening-and-assessments.html",
    "Changes Between Versions": "changes-between-versions.html",
    "Future of US Core": "future-of-US-core.html",
    "Writing Vital Signs": "vitals-write.html",
    "FHIR Artifacts": None,
    "Profiles and Extensions": "profiles-and-extensions.html",
    "Search Parameters and Operations": "search-parameters-and-operations.html",
    "Terminology": "terminology.html",
    "Capability Statements": "capability-statements.html",
    "Security": "security.html",
    "Examples": "examples.html",
    "Downloads": "downloads.html",
    "Change Log": "changes.html"
}

Profiles = ['http://hl7.org/fhir/us/core/StructureDefinition/head-occipital-frontal-circumference-percentile',
 'http://hl7.org/fhir/us/core/StructureDefinition/pediatric-bmi-for-age',
 'http://hl7.org/fhir/us/core/StructureDefinition/pediatric-weight-for-height',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-adi-documentreference',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-allergyintolerance',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-authentication-time',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-average-blood-pressure',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-birthsex',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-blood-pressure',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-bmi',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-body-height',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-body-temperature',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-body-weight',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-care-experience-preference',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-careplan',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-careteam',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-condition-encounter-diagnosis',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-condition-problems-health-concerns',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-coverage',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-diagnosticreport-lab',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-diagnosticreport-note',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-direct',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-documentreference',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-encounter',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-ethnicity',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-extension-questionnaire-uri',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-genderIdentity',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-goal',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-head-circumference',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-heart-rate',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-immunization',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-implantable-device',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-interpreter-needed',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-jurisdiction',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-location',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-medication-adherence',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-medication',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-medicationdispense',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-medicationrequest',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-observation-adi-documentation',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-observation-clinical-result',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-observation-lab',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-observation-occupation',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-observation-pregnancyintent',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-observation-pregnancystatus',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-observation-screening-assessment',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-observation-sexual-orientation',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-organization',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-patient',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-practitioner',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-practitionerrole',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-procedure',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-provenance',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-pulse-oximetry',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-questionnaireresponse',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-race',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-relatedperson',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-respiratory-rate',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-servicerequest',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-sex',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-simple-observation',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-smokingstatus',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-specimen',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-treatment-intervention-preference',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-tribal-affiliation',
 'http://hl7.org/fhir/us/core/StructureDefinition/uscdi-requirement',
 'http://hl7.org/fhir/us/core/StructureDefinition/us-core-vital-signs',
 ]

# matches = [r'\b(\w+)(?:\W+\1\b)+',]  #duplicate words
# matches = [r'\[.*?\]'] #bad md links
# matches = [r'\{\{.*?\}\}',r'\{%.*?%\}',]  #liquid tags, filter
matches = ['git st','git pu', 'git co']  #git commands


for page in list(Pages.values()):
    if page:
        URL = f'{base_url}{page}'
# for page in Profiles:
#         URL = f"{page.replace('http://hl7.org/fhir/us/core/StructureDefinition/', 'http://build.fhir.org/ig/HL7/US-Core/StructureDefinition-')}.html"
        # print(URL)
        response = requests.get(URL)
        
        soup = bs4.BeautifulSoup(response.text)
        mypage = soup.get_text(' ', strip=True)
        # print(soup.get_text('', strip=True))
        for match in matches:
            mymatch = re.findall(match, mypage)
            if mymatch:
                print(URL)
                print(mymatch)
                print('--------------------------------------')
            # else:
            #         print('no match')
            # print()
            # print(".")






