In [None]:
from os import listdir
from os.path import join, isfile
import pandas as pd
from rdflib import URIRef, term, Graph, Literal, Namespace
from rdflib.namespace import OWL,RDF, RDFS, SKOS, XSD

In [None]:
EIOPA_DATA_PATH = join("..", "data", "external", "eiopa")
GLEIF_DATA_PATH = join("..", "data", "external", "gleif")

# Initialize graph with namespace bindings

In [None]:
g = Graph()
# g.bind('owl', OWL)

# # Namespaces used in GLEIF data
# g.bind('gleif-ELF',       URIRef('https://www.gleif.org/ontology/EntityLegalForm/'))
# g.bind('gleif-ELF-data',  URIRef('https://rdf.gleif.org/EntityLegalForm/'))
# g.bind('gleif-RA',        URIRef('https://www.gleif.org/ontology/RegistrationAuthority/'))
# g.bind('gleif-RA-data',   URIRef('https://rdf.gleif.org/RegistrationAuthority/'))
# g.bind('CountryCodes',    URIRef('https://www.omg.org/spec/LCC/Countries/ISO3166-1-CountryCodes-Adjunct/'))
# g.bind('SubdivisionCodes',URIRef('https://www.omg.org/spec/LCC/Countries/ISO3166-2-SubdivisionCodes-Adjunct/'))

# # Provisional EIOPA namespaces (they do not exist yet)
# g.bind('eiopa-Base',      URIRef("https://eiopa.europe.eu/ontology/Base/"))
# g.bind('eiopa-NCA',       URIRef("https://eiopa.europe.eu/ontology/NCA/"))
# g.bind('eiopa-NCA-data',  URIRef("https://rdf.eiopa.europe.eu/NCA/"))
# g.bind('eiopa-data',      URIRef("https://rdf.eiopa.europe.eu/data/"))

# Read GLEIF and EIOPA data

In [None]:
with open(join(GLEIF_DATA_PATH,'gleif-L1-extract.ttl'), "rb") as fp:
    g.parse(data = fp.read(), format = 'turtle')

with open(join(GLEIF_DATA_PATH, 'EntityLegalFormData.ttl'), "rb") as fp:
    g.parse(data = fp.read(), format = 'turtle')

# fp = open('H:\\10_central_data\\gleif\\gleif-lei-data\\gleif-lei-data\\original\\L2Data.rdf', "rb")
# data = fp.read()
# fp.close()
# g.parse(data = data)

print("graph has {} statements.".format(len(g)))

In [None]:
register = pd.read_csv(join(EIOPA_DATA_PATH, 'DATINS_Export_637439776565055685.csv'), sep = ';')

# Add triples to graph

In [None]:
eiopa = Graph()

eiopa.bind('CountryCodes',    URIRef('https://www.omg.org/spec/LCC/Countries/ISO3166-1-CountryCodes-Adjunct/'))
eiopa.bind('gleif-Base',      URIRef('https://www.gleif.org/ontology/Base/'))
eiopa.bind('eiopa-Base',      URIRef("https://eiopa.europe.eu/ontology/Base/"))
eiopa.bind('owl', OWL)

gleif_base = Namespace('https://www.gleif.org/ontology/Base/')
eiopa_base = Namespace("https://eiopa.europe.eu/ontology/Base/")
eiopa_NCA = Namespace("https://rdf.eiopa.europe.eu/NCA/")

for idx in register.index:
    row = register.loc[idx]
    if str(row["LEI"])!='nan':
        
        # find subject with specific LEI
        query = '''SELECT ?s 
                   WHERE {?lei <https://www.gleif.org/ontology/L1/LEI> "'''+row["LEI"]+'''" . 
                          ?lei <https://www.gleif.org/ontology/L1/identifiesAndRecords> ?s.}'''  
        results = g.query(query)
        if len(results)==1:
            subj = list(results)[0][0]
        else:
            print("lei not found: "+row['LEI'])
            
        # specify that subject is an insurance undertaking
        pred = OWL.a
        obj = eiopa_base.InsuranceUndertaking
        eiopa.add((subj, pred, obj))
        
        # add register reference to subject
        pred = eiopa_base.hasRegisterIdentifier
        nca = row["Name of NCA"].replace(" ", "-").replace("(", "").replace(")", "")
        idcode = row["Identification code"].replace(" ", "")
        obj = URIRef("https://rdf.eiopa.europe.eu/L1-data/IURI-"+nca+"-"+idcode)
        eiopa.add((subj, pred, obj))

        # add register entry
        eiopa.add((obj, OWL.a, eiopa_base.InsuranceUndertakingRegisterIdentifier))
        # add original subject reference to register entry
        eiopa.add((obj, gleif_base.identifies, subj))

        # add specificities of register
        if row["Cross border status"]=='Domestic undertaking':
            eiopa.add((obj, eiopa_base.hasCrossBorderStatus,          Literal(row["Cross border status"])))
            eiopa.add((obj, eiopa_base.hasEUCountryWhereEntityOperates, URIRef('https://www.omg.org/spec/LCC/Countries/ISO3166-1-CountryCodes-Adjunct/' + row["EU Country where the entity operates"])))

            eiopa.add((obj, eiopa_base.hasInsuranceUndertakingID,     Literal(row["Identification code"])))
            eiopa.add((obj, eiopa_base.hasNCA,                        Literal(row["Name of NCA"])))

            eiopa.add((obj, eiopa_base.hasRegistrationStartDate,      Literal(row['Registration start date'])))
            if str(row['Registration end date'])!='nan':
                eiopa.add((obj, eiopa_base.hasRegistrationEndDate,        Literal(row['Registration end date'])))

            eiopa.add((obj, eiopa_base.hasOperationStartDate,         Literal(row['Operation Start Date'])))
            if str(row['Operation End Date'])!='nan':
                eiopa.add((obj, eiopa_base.hasOperationEndDate,           Literal(row['Operation End Date'])))            

In [None]:
with open(join(EIOPA_DATA_PATH, "eiopa_register.ttl"), "wb") as f:
    f.write(eiopa.serialize(format="turtle"))