-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge_ead_into_mets.py
76 lines (64 loc) · 1.78 KB
/
merge_ead_into_mets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# !/usr/bin/env python
# -*- coding: utf8 -*-
# Execution example : python merge_ead_into_mets.py /path/to/file/ead.xml /path/to/file/mets.xml
#
# Libs
#
from lxml import etree
import os
import sys
#
# Config
#
result_file = 'merge_ead_into_mets.xml'
METS_URL = 'http://www.loc.gov/METS/'
NSMAP = {'mets' : METS_URL}
METS_PREFIX = '{' + METS_URL + '}'
#
# Functions
#
def merge(ead_file, mets_file) :
# Open EAD file
ead_tree = etree.parse(ead_file).getroot()
# Open METS file
mets_tree = etree.parse(mets_file).getroot()
# Create a new node that will receive the EAD data
dmdSec = etree.Element(METS_PREFIX + 'dmdSec', ID='ead', GROUPID='ead', nsmap=NSMAP)
mdWrap = etree.SubElement(dmdSec, METS_PREFIX + 'mdWrap', {'MDTYPE' : 'EAD'})
xmlData = etree.SubElement(mdWrap, METS_PREFIX + 'xmlData')
# Include the EAD data into this node
xmlData.insert(0, ead_tree)
# Add the EAD node to the root node of the mets file as second child
mets_tree.insert(1, dmdSec)
# Write result into file
result_tree = etree.ElementTree(mets_tree)
result_tree.write(result_file, encoding='UTF-8', pretty_print=True, xml_declaration=True)
def main() :
# Check that all args are here
if len(sys.argv) != 3 :
print "Check the number of arguments"
exit()
ead_file = sys.argv[1]
mets_file = sys.argv[2]
# Check that files exist
if not os.path.isfile(ead_file) :
print "EAD is not a file"
exit()
if not os.path.isfile(ead_file) :
print "METS is not a file"
exit()
# Check that files are xml
if not ead_file.lower().endswith('.xml') :
print "EAD is not an XML file"
exit()
if not mets_file.lower().endswith('.xml') :
print "METS is not an XML file"
exit()
merge(ead_file, mets_file)
print 'Results writed into : ' + result_file
print 'End script'
#
# Main
#
if __name__ == '__main__':
main()