In [5]:
import requests
from lxml import etree
import io

def fetch_xsd_files_with_content(api_url, access_token):
    xsd_files_with_content = []

    headers = {"Authorization": f"token {access_token}"}
    response = requests.get(api_url, headers=headers)

    if response.status_code == 200:
        data = response.json()
        for item in data:
            if item['type'] == 'file' and item['name'].endswith('.xsd'):
                xsd_url = item['download_url']
                xsd_content = fetch_xsd_content(xsd_url)  # Fetch .xsd content
                if xsd_content:
                    xsd_files_with_content.append((item['name'], xsd_content))
            elif item['type'] == 'dir':
                xsd_files_with_content += fetch_xsd_files_with_content(item['url'], access_token)
    else:
        print(f"Error: {response.status_code}")

    return xsd_files_with_content

def fetch_xml_from_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for any HTTP errors
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching XML file from {url}: {e}")
        return None

def is_valid_unit_operation(xsd_content, unit_operation_xml):
    try:
        # Parse the .xsd content
        xsd_schema = etree.XMLSchema(etree.fromstring(xsd_content.encode()))  # Convert to bytes and parse
        # Parse the UnitOperation XML
        unit_operation_root = etree.XML(unit_operation_xml.encode())

        # Validate the UnitOperation XML against the .xsd schema
        return xsd_schema.validate(unit_operation_root)
    except etree.XMLSyntaxError:
        return False

def fetch_xsd_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Check for any HTTP errors
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching .xsd file from {url}: {e}")
        return None

# GitHub repository URL and other variables
repository_url = 'https://github.com/Gressling/S88'
api_url = 'https://api.github.com/repos/Gressling/S88/contents/'
access_token = 'YOUR_GIT_HUB_ACESS_TOKEN'
xsd_url = "https://raw.githubusercontent.com/Gressling/S88/main/Sequences/Sequences.xsd"

# Fetch all .xsd files and XML content
all_xsd_files_with_content = fetch_xsd_files_with_content(api_url, access_token)
url_to_xml = "https://raw.githubusercontent.com/Gressling/S88/main/Sequences/Example/3UnitOperations.xml"
xml_content = fetch_xml_from_url(url_to_xml)
xsd_content = fetch_xsd_content(xsd_url)

if xml_content is not None:
    all_unit_operations_valid = True
    for event, element in etree.iterparse(io.BytesIO(xml_content.encode()), events=('end',), tag='UnitOperation'):
        tag_after_unit_operation = element.getchildren()[0].tag
        unit_operation_valid = False
        for xsd_filename, xsd_content in all_xsd_files_with_content:
            if is_valid_unit_operation(xsd_content, etree.tostring(element).decode()):
                unit_operation_valid = True
                break
        if not unit_operation_valid:
            all_unit_operations_valid = False
            print(f"Unit Operation <{tag_after_unit_operation}> is not valid.")
        element.clear()  # Clear the element from memory after processing

    if xml_content and xsd_content:
        if all_unit_operations_valid:
            print("The Sequence is valid with its Unit Operations.")
        else:
            print("The Sequence is false but the UnitOperations are true.")
    else:
        if xml_content is not None:
            print("The Sequence is true but the UnitOperations are not valid.")
        else:
            print("The Sequence and the UnitOperations are false.")


The Sequence is valid with its Unit Operations.
