In [1]:
import fairseq
import os
import torch
import re

from fairseq.models.transformer import TransformerModel
from os.path import join, exists
from rdflib import term, Graph

In [2]:
ID = 14126

In [5]:
fairseq_folder = "../../data/eiopa/5_model_input/fairseq-data-bin-{}".format(ID)
model_path = '../models/transformer_iwslt_de_en_{}'.format(ID)
model_input = '../data/eiopa/5_model_input/{}-bpe.codes'.format(ID)
GRAPH_DATA_PATH = join("..", "data", "eiopa", "1_external")

def initialize_graph(graph_data_path):
    """ Initializes the database graph and returns the graph """
    print("     Initializing Graph: This takes some time")
    eiopa_data_path = os.path.join(graph_data_path, "eiopa")
    gleif_data_path = os.path.join(graph_data_path, "gleif")

    g = Graph()
    with open(os.path.join(eiopa_data_path, 'eiopa_register.ttl'), "rb") as fp:
        g.parse(data=fp.read(), format='turtle')
    with open(os.path.join(gleif_data_path, 'gleif-L1-extract.ttl'), "rb") \
            as fp:
        g.parse(data=fp.read(), format='turtle')
    with open(os.path.join(gleif_data_path, 'EntityLegalFormData.ttl'), "rb") \
            as fp:
        g.parse(data=fp.read(), format='turtle')
    print("Graph has {} statements.".format(len(g)))
    return g

REPLACEMENTS = [
    ['dbo:', 'http://dbpedia.org/ontology/', 'dbo_'],
    ['dbp:', 'http://dbpedia.org/property/', 'dbp_'],
    ['dbc:', 'http://dbpedia.org/resource/Category:', 'dbc_'],
    ['dbr:', 'res:', 'http://dbpedia.org/resource/', 'dbr_'],
    ['dct:', 'dct_'],
    ['geo:', 'geo_'],
    ['georss:', 'georss_'],
    ['rdf:', 'rdf_'],
    ['rdfs:', 'rdfs_'],
    ['foaf:', 'foaf_'],
    ['owl:', 'owl_'],
    ['yago:', 'yago_'],
    ['skos:', 'skos_'],
    [' ( ', '  par_open  '],
    [' ) ', '  par_close  '],
    ['(', ' attr_open '],
    [') ', ')', ' attr_close '],
    ['{', ' brack_open '],
    ['}', ' brack_close '],
    [' . ', ' sep_dot '],
    ['. ', ' sep_dot '],
    ['?', 'var_'],
    ['*', 'wildcard'],
    [' <= ', ' math_leq '],
    [' >= ', ' math_geq '],
    [' < ', ' math_lt '],
    [' > ', ' math_gt '],
    [' "', ' quot_mark_l '],
    ['" ', ' quot_mark_r '],
    ['"', ' quot_mark_n ']
]


def sparql_decode(encoded_sparql):
    short_sparql = reverse_replacements(encoded_sparql)
    sparql = reverse_shorten_query(short_sparql)
    return sparql

def reverse_replacements(sparql):
    for r in REPLACEMENTS:
        original = r[0]
        encoding = r[-1]
        sparql = sparql.replace(encoding, original)
        stripped_encoding = str.strip(encoding)
        sparql = sparql.replace(stripped_encoding, original)
    return sparql

def reverse_shorten_query(sparql):
    sparql = re.sub(r'_oba_ ([\S]+)', 'order by asc (\\1)', sparql,
                    flags=re.IGNORECASE)
    sparql = re.sub(r'_obd_ ([\S]+)', 'order by desc (\\1)', sparql,
                    flags=re.IGNORECASE)
    return sparql

g = initialize_graph(GRAPH_DATA_PATH)

model = TransformerModel.from_pretrained(
  model_path,
  checkpoint_file='checkpoint_best.pt',
  data_name_or_path=fairseq_folder,
  bpe='subword_nmt',
  bpe_codes= model_input,
  tokenizer = 'moses'
)

# function to visualize the name of the uri without namespace
def get_name(uri):
    if isinstance(uri, term.URIRef):
        return uri.n3().split("/")[-1][0:-1]
    else:
        return uri
    

def query_database(query, graph = g):
    """ Returns list of query results """
    results = []
    for row in graph.query(query):
        items = []
        for item in row:
            items.append(str(get_name(item)))
        results.append(items)

    return results

def translate_and_query(query,model = model):
    query_decoded = sparql_decode(model.translate(query))
    print("Query decoded:",query_decoded)
    print(query_database(query_decoded))
    return True

     Initializing Graph: This takes some time
Graph has 375390 statements.


In [6]:
translate_and_query("Where is univé het groene hart brandverzekeraar n.v. located and what is the registration number?")

Query decoded: SELECT DISTINCT ?a WHERE{?e eiopa-Base:hasIdentifyingName "univé het groene hart brandverzekeraar n.v." . ?e eiopa-Base:hasInsuranceUndertakingID ?a . }
[['W1976']]


True

In [7]:
translate_and_query("location of 'allianz' ?")

Query decoded: SELECT DISTINCT ?o WHERE{?e eiopa-Base:hasIdentifyingName "onvz aanvullende verzekering n.v." . ?e eiopa-Base:hasEUCountryWhereEntityOperates ?o . }
[['NL']]


True

In [8]:
translate_and_query("In what country does w1963 operate?")

Query decoded: SELECT DISTINCT ?a WHERE{?x eiopa-Base:hasEUCountryWhereEntityOperates ?a . ?x eiopa-Base:hasInsuranceUndertakingID "w1963" . }
[]


True

In [9]:
translate_and_query("Legal name of w1889?")

Query decoded: SELECT DISTINCT ?o WHERE{?s gleif-L1:hasLegalName ?o . ?e gleif-Base:identifies ?s . ?e eiopa-Base:hasInsuranceUndertakingID "w1889" . }
[]


True

In [10]:
translate_and_query("In what country does stad holland zorgverzekeraar operate?")

Query decoded: SELECT DISTINCT ?o WHERE{?e eiopa-Base:hasIdentifyingName "univé stad en land brandverzekeraar n.v." . ?e eiopa-Base:hasEUCountryWhereEntityOperates ?o . }
[['NL']]


True

In [11]:
translate_and_query("Who is the supervisor of onderlinge verzekeringsmaatschappij de veenhoop u.a.?")

Query decoded: SELECT DISTINCT ?o WHERE{?e eiopa-Base:hasIdentifyingName "onderlinge levensverzekering-maatschappij 's-gravenhage u.a." . ?e eiopa-Base:hasNCA ?o . }
[]


True

In [12]:
translate_and_query("What is uk p&i club n.v.'s register name?")

Query decoded: SELECT DISTINCT ?a WHERE{?e eiopa-Base:hasIdentifyingName "uk p&i club n.v." . ?e eiopa-Base:hasInsuranceUndertakingID ?a . }
[['R161589']]


True

In [13]:
translate_and_query("legal name of 'univé schade'?")

Query decoded: SELECT DISTINCT ?o WHERE{?e eiopa-Base:hasIdentifyingName "univé oost brandverzekeraar n.v." . ?e eiopa-Base:hasEUCountryWhereEntityOperates ?o . }
[['NL']]


True

In [14]:
translate_and_query("Where does achmea zorgverzekeringen n.v. operate?")

Query decoded: SELECT DISTINCT ?o WHERE{?e eiopa-Base:hasIdentifyingName "achmea zorgverzekeringen n.v." . ?e eiopa-Base:hasEUCountryWhereEntityOperates ?o . }
[['NL']]


True