In [1]:
from lxml import etree
from os.path import join, isfile
from os import listdir
from src.data import process_instance

In [2]:
base_dir = join("..", "data", "external","instances")
output_dir = join("..", "data", "raw")

rdf_namespaces = {"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
                  "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
                  "xl": "http://www.xbrl.org/2003/XLink",
                  "xsd": "http://www.w3.org/2001/XMLSchema",
                  'xbrll': "https://w3id.org/vocab/xbrll",
                  "dnb": "http://dnb.nl/",
                  "gleif-l1": "https://www.gleif.org/ontology/L1/"
}

In [3]:
xbrl_files = [f for f in listdir(base_dir) if (isfile(join(base_dir, f))) and f[-4:].lower()=='xbrl']
for file in xbrl_files:
    print("processing: "+str(file))
    base = join(base_dir, file)
    with open(base, "rb") as fp:
        root = etree.fromstring(fp.read(), parser=etree.XMLParser(remove_comments=True))
    namespaces = {**root.nsmap, **rdf_namespaces}
    instance = process_instance.processInstance(root, file, namespaces)
    print("      write to: " +str(join(file[:-5] + '.ttl')))
    with open(join(output_dir, file[:-5]+'.ttl'), 'w') as fp:
        fp.write(str(instance['output'].getvalue()))

processing: aeb_240_instance.xbrl
      write to: aeb_240_instance.ttl
processing: aes_240_instance.xbrl
      write to: aes_240_instance.ttl
processing: afb_240_instance.xbrl
      write to: afb_240_instance.ttl
processing: afg_240_instance.xbrl
      write to: afg_240_instance.ttl
processing: afs_240_instance.xbrl
      write to: afs_240_instance.ttl
processing: apg_240_instance.xbrl
      write to: apg_240_instance.ttl
processing: aps_240_instance.xbrl
      write to: aps_240_instance.ttl
processing: arb_240_instance.xbrl
      write to: arb_240_instance.ttl
processing: arg_240_instance.xbrl
      write to: arg_240_instance.ttl
processing: ars_240_instance.xbrl
      write to: ars_240_instance.ttl
processing: DNB-NR_FTK-2019-06_2019-12-31_MOD_APF-J.XBRL
      write to: DNB-NR_FTK-2019-06_2019-12-31_MOD_APF-J.ttl
processing: DNB-NR_FTK-2019-06_2019-12-31_MOD_APF-KW.XBRL
      write to: DNB-NR_FTK-2019-06_2019-12-31_MOD_APF-KW.ttl
processing: DNB-NR_FTK-2019-06_2019-12-31_MOD_FTK-ALL.

In [4]:
namespaces

{'s2c_AO': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/AO',
 's2c_CN': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/CN',
 's2c_LA': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/LA',
 's2c_LT': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/LT',
 's2c_NC': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/NC',
 's2c_PC': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/PC',
 'find': 'http://www.eurofiling.info/xbrl/ext/filing-indicators',
 'iso4217': 'http://www.xbrl.org/2003/iso4217',
 'link': 'http://www.xbrl.org/2003/linkbase',
 's2c_AM': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/AM',
 's2c_AP': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/AP',
 's2c_BR': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/BR',
 's2c_CG': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/CG',
 's2c_CS': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/CS',
 's2c_CU': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/CU',
 's2c_DI': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/DI',
 's2c_EL': 'http://eiopa.europa.eu/xbrl/s2c/dict/dom/EL',
 's2c_EX': 'http://eiopa.eur