In [None]:
import os
from lxml import etree

# Load the XML file
xml_file = 'test-dspace/1735315236_DSPACE_OAI_10673_1169.xml'
xml_tree = etree.parse(xml_file)

# Load the XSLT file
xslt_file = 'xsl/DC2MARC21slim.xsl'
xslt_tree = etree.parse(xslt_file)

# Perform the transformation
transform = etree.XSLT(xslt_tree)
result_tree = transform(xml_tree)

# Create the output directory if it doesn't exist
output_dir = 'output'
os.makedirs(output_dir, exist_ok=True)

# Generate the output filename
base_name = os.path.basename(xml_file)
name, ext = os.path.splitext(base_name)
output_file = os.path.join(output_dir, f'{name}_transformed{ext}')

# Save the result to the new file
result_tree.write(output_file, pretty_print=True, xml_declaration=True, encoding='UTF-8')

print(f'Transformation complete. Output saved to {output_file}')

In [20]:
import os
from lxml import etree

# Load the XML file
xml_file = 'test-archivesspace/1735549041_ARCHIVESSPACE_OAI_archivesspace__repositories_2_resources_2.xml'
xml_tree = etree.parse(xml_file)

# Load the XSLT file
xslt_file = 'xsl/own.xsl'
xslt_tree = etree.parse(xslt_file)

# Perform the transformation
transform = etree.XSLT(xslt_tree)

# Capture any errors or warnings during the transformation
try:
    result_tree = transform(xml_tree)
except etree.XSLTApplyError as e:
    print(f"Transformation failed: {e.error_log}")
    for error in e.error_log:
        print(error.message)
    exit(1)

# Check if the transformation was successful
if result_tree.getroot() is None:
    print("Transformation failed or produced an empty result.")
else:
    # Create the output directory if it doesn't exist
    output_dir = 'output'
    os.makedirs(output_dir, exist_ok=True)

    # Generate the output filename
    base_name = os.path.basename(xml_file)
    name, ext = os.path.splitext(base_name)
    output_file = os.path.join(output_dir, f'{name}_transformed{ext}')

    # Save the result to the new file
    result_tree.write(output_file, pretty_print=True, xml_declaration=True, encoding='UTF-8')

    print(f'Transformation complete. Output saved to {output_file}')

Transformation complete. Output saved to output/1735549041_ARCHIVESSPACE_OAI_archivesspace__repositories_2_resources_2_transformed.xml


In [21]:
import os
from lxml import etree

# Define the input and output directories
input_dir = 'test-archivesspace'
output_dir = 'output'
xslt_file = 'xsl/own.xsl'

# Load the XSLT file
xslt_tree = etree.parse(xslt_file)
transform = etree.XSLT(xslt_tree)

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Iterate over all XML files in the input directory
for xml_file in os.listdir(input_dir):
    if xml_file.endswith('.xml'):
        xml_path = os.path.join(input_dir, xml_file)
        
        # Load the XML file
        xml_tree = etree.parse(xml_path)

        # Perform the transformation
        try:
            result_tree = transform(xml_tree)
        except etree.XSLTApplyError as e:
            print(f"Transformation failed for {xml_file}: {e.error_log}")
            for error in e.error_log:
                print(error.message)
            continue

        # Check if the transformation was successful
        if result_tree.getroot() is None:
            print(f"Transformation failed or produced an empty result for {xml_file}.")
        else:
            # Generate the output filename
            name, ext = os.path.splitext(xml_file)
            output_file = os.path.join(output_dir, f'{name}_transformed{ext}')

            # Save the result to the new file
            result_tree.write(output_file, pretty_print=True, xml_declaration=True, encoding='UTF-8')

            print(f'Transformation complete for {xml_file}. Output saved to {output_file}')

Transformation complete for 1735549072_ARCHIVESSPACE_OAI_archivesspace__repositories_2_resources_18.xml. Output saved to output/1735549072_ARCHIVESSPACE_OAI_archivesspace__repositories_2_resources_18_transformed.xml
Transformation complete for 1735549054_ARCHIVESSPACE_OAI_archivesspace__repositories_4_resources_12.xml. Output saved to output/1735549054_ARCHIVESSPACE_OAI_archivesspace__repositories_4_resources_12_transformed.xml
Transformation complete for 1735549082_ARCHIVESSPACE_OAI_archivesspace__repositories_4_resources_22.xml. Output saved to output/1735549082_ARCHIVESSPACE_OAI_archivesspace__repositories_4_resources_22_transformed.xml
Transformation complete for 1735549046_ARCHIVESSPACE_OAI_archivesspace__repositories_2_resources_6.xml. Output saved to output/1735549046_ARCHIVESSPACE_OAI_archivesspace__repositories_2_resources_6_transformed.xml
Transformation complete for 1735549049_ARCHIVESSPACE_OAI_archivesspace__repositories_4_resources_8.xml. Output saved to output/1735549049_

In [13]:
import os

def merge_xsl_files(main_file, utils_file, output_file):
    with open(utils_file, 'r', encoding='utf-8') as f:
        utils_content = f.read()

    with open(main_file, 'r', encoding='utf-8') as f:
        main_content = f.read()

    # Remove the XML declaration from both files if they exist
    if utils_content.startswith('<?xml'):
        utils_content = utils_content.split('?>', 1)[1].strip()

    if main_content.startswith('<?xml'):
        main_content = main_content.split('?>', 1)[1].strip()

    # Remove the import statement from the main file
    main_content = main_content.replace('<xsl:import href="MARC21slimUtils.xsl" />', '')

    # Ensure the utils content does not have its own closing xsl:stylesheet tag
    if utils_content.strip().endswith('</xsl:stylesheet>'):
        utils_content = utils_content.rsplit('</xsl:stylesheet>', 1)[0].strip()

    # Ensure the main content does not have its own opening xsl:stylesheet tag
    if main_content.strip().startswith('<xsl:stylesheet'):
        main_content = main_content.split('>', 1)[1].strip()

    # Merge the contents
    merged_content = '<?xml version="1.0" encoding="UTF-8" ?>\n<xsl:stylesheet version="1.0" xmlns:marc="http://www.loc.gov/MARC21/slim" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">\n' + utils_content + '\n' + main_content + '\n</xsl:stylesheet>'

    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(merged_content)

    print(f"Merged file saved as {output_file}")

# Define file paths
main_file = 'xsl/EADAnalyticstoMARC21slimXML.xsl'
utils_file = 'xsl/MARC21slimUtils.xsl'
output_file = 'xsl/MergedEADAnalyticstoMARC21slimXML.xsl'

# Merge the files
merge_xsl_files(main_file, utils_file, output_file)

Merged file saved as xsl/MergedEADAnalyticstoMARC21slimXML.xsl
