In [52]:
import requests
from lxml import etree

def load_xml_from_url(xml_url):
  response = requests.get(xml_url)
  xml_doc = etree.fromstring(response.content)
  return xml_doc

def load_xsd_from_url(xsd_url):
  response = requests.get(xsd_url)
  xsd_doc = etree.fromstring(response.content)
  xsd_schema = etree.XMLSchema(xsd_doc)
  return xsd_schema

#Validates XML against XSD
def validate_xml(xml_doc, xsd_schema):
  is_valid = xsd_schema.validate(xml_doc)
  return is_valid

#Prints result of validation
def print_validation_result(is_valid):
  if is_valid: print("The XML is valid against the XSD schema.")
  else: print("The XML is not valid against the XSD schema.")
  print(xsd_schema.error_log)


In [53]:
# Load XML document UnitOperation-AddOnce
xml_doc = load_xml_from_url("https://raw.githubusercontent.com/Gressling/S88/main/UnitOperations/AddOnce/AddOnce-test.xml")

# Load XSD framework for UnitOperation
xsd_schema = load_xsd_from_url("https://raw.githubusercontent.com/Gressling/S88/main/UnitOperations/AddOnce/AddOnce.xsd")

# Validate the XML against the XSD schema
is_valid = validate_xml(xml_doc, xsd_schema)

# Print validation result
print_validation_result(is_valid)



The XML is valid against the XSD schema.



In [54]:
# Creating dataframe for XML
# Parse the XML file UnitOperation-AddOnce-Aspirin

import pandas as pd
import xml.etree.ElementTree as ET


url = "https://raw.githubusercontent.com/Gressling/S88/main/S88-UnitOperation-AddOnce-Aspirin.xml"
response = requests.get(url)
xml_data = response.content

# Parse the XML data
root = ET.fromstring(xml_data)

#List to store extracted data
reactant_list = []
amount_list = []
timing_list = []
rate_of_addition_list = []
temperature_list = []
pressure_list = []
stirring_list = []
solvent_list = []
pH_list = []
safety_considerations_list = []
reaction_environment_list = []
equipment_list = []

# Extract data from XML and populate the lists
for unit_operation in root.findall("UnitOperation/UnitOperationName"):
    reactant_list.append(unit_operation.find('Parameters/Reactant').text)
    amount_list.append(unit_operation.find('Parameters/Amount').text)
    timing_list.append(unit_operation.find('Parameters/Timing').text)
    rate_of_addition_list.append(unit_operation.find('Parameters/RateOfAddition').text)
    temperature_list.append(unit_operation.find('Parameters/Temperature').text)
    pressure_list.append(unit_operation.find('Parameters/Pressure').text)
    stirring_list.append(unit_operation.find('Parameters/Stirring').text)
    solvent_list.append(unit_operation.find('Parameters/Solvent').text)
    pH_list.append(unit_operation.find('Parameters/PH').text)
    safety_considerations_list.append(unit_operation.find('Parameters/SafetyConsiderations').text)
    reaction_environment_list.append(unit_operation.find('Parameters/ReactionEnvironment').text)
    equipment_list.append(unit_operation.find('Parameters/Equipment').text)

# Create a dictionary to hold the data
data = {
    'Reactant': reactant_list,
    'Amount': amount_list,
    'Timing': timing_list,
    'Rate of Addition': rate_of_addition_list,
    'Temperature': temperature_list,
    'Pressure': pressure_list,
    'Stirring': stirring_list,
    'Solvent': solvent_list,
    'pH': pH_list,
    'Safety Considerations': safety_considerations_list,
    'Reaction Environment': reaction_environment_list,
    'Equipment': equipment_list
}

# Create the dataframe
df = pd.DataFrame(data)

# Print the dataframe
print(df)


Empty DataFrame
Columns: [Reactant, Amount, Timing, Rate of Addition, Temperature, Pressure, Stirring, Solvent, pH, Safety Considerations, Reaction Environment, Equipment]
Index: []
