In [1]:
import argparse
import collections
import datetime
import json
import logging
import operator
import os
import random
import re
import sys
import traceback
from tqdm import tqdm
import io

# from generator_utils import log_statistics, save_cache, query_dbpedia,\
#  strip_brackets, encode, read_template_file
from generator import *
from generator_utils import *

import importlib

from rdflib import URIRef, term, Graph, Literal, Namespace
from rdflib.namespace import OWL,RDF, RDFS, SKOS, XSD

# MAIN

In [2]:
template_file = "../../data/eiopa/1_external/templates.csv"
output_dir = "../../data/eiopa/3_processed"
use_resources_dump = False
file_mode = 'w'

In [3]:
# # (MG): Initiate logging file
# time = datetime.datetime.today()
# logging.basicConfig(
#         filename='{}/logs/generator_{:%Y-%m-%d-%H-%M}.log'.format(output_dir, time), level=logging.DEBUG)

## Initiate Graph 

In [4]:

EIOPA_DATA_PATH = os.path.join("..","..", "data", "external", "eiopa")
GLEIF_DATA_PATH = os.path.join("..","..", "data", "external", "gleif")

g = Graph()

with open(os.path.join(EIOPA_DATA_PATH,'eiopa_register.ttl'), "rb") as fp:
    g.parse(data = fp.read(), format = 'turtle')
# 
with open(os.path.join(GLEIF_DATA_PATH,'gleif-L1-extract.ttl'), "rb") as fp:
    g.parse(data = fp.read(), format = 'turtle')
    
with open(os.path.join(GLEIF_DATA_PATH,'EntityLegalFormData.ttl'), "rb") as fp:
    g.parse(data = fp.read(), format = 'turtle')

print("graph has {} statements.".format(len(g)))

graph has 368854 statements.


## Generate Dataset

In [5]:
templates = read_template_file(template_file)

# # def generate_dataset(templates, output_dir, file_mode):
# """
#     Input: list of Annotation elements, output_directory, file_mode
#     Output: questions dataset, query dataset

#     This function will generate dataset from the given templates and
#     store it to the output directory.
# """
# it = 0
# cache = dict()
# for template in tqdm(templates):
#     it = it + 1
#     print("for {}th template".format(it))
#     try:
#         results = get_results_of_generator_query(cache, template)
#     except:
#         exception = traceback.format_exc()
#         logging.error('template {} caused exception {}'.format(
#             getattr(template, 'id'), exception))
#         logging.info(
#             '1. fix problem\n2. remove templates until the exception template in the template file\n3. restart with `--continue` parameter')
#         raise Exception()

In [6]:
def query_database(query):
    """ Returns list of query results """
    results = []
    for row in g.query(query):
        items = []
        for item in row:
            items.append(str(get_name(item)))
        results.append(items)

    return results

In [7]:
template = templates[1]
print(template.generator_query)
print(template.variables)

select distinct ?a where {?x eiopa-Base:hasEUCountryWhereEntityOperates CountryCodes:NL . ?x eiopa-Base:hasInsuranceUndertakingID ?a.}
['a']


In [8]:
generator_query = prepare_generator_query(template)

In [9]:
print(generator_query)

select distinct ?a where {?x eiopa-Base:hasEUCountryWhereEntityOperates CountryCodes:NL . ?x eiopa-Base:hasInsuranceUndertakingID ?a.}


In [10]:
results = query_database(generator_query)

In [11]:
for item in results:
    print(item)
    print(build_dataset_pair(item,template))

['L0008']
{'natural_language': 'In what jurisdiction does l0008 operate?', 'query': 'select distinct var_a where  brack_open var_x eiopa-base:haseucountrywhereentityoperates var_a sep_dot var_x eiopa-base:hasinsuranceundertakingid <a> sep_dot brack_close '}
['H0125']
{'natural_language': 'In what jurisdiction does h0125 operate?', 'query': 'select distinct var_a where  brack_open var_x eiopa-base:haseucountrywhereentityoperates var_a sep_dot var_x eiopa-base:hasinsuranceundertakingid <a> sep_dot brack_close '}
['W1943']
{'natural_language': 'In what jurisdiction does w1943 operate?', 'query': 'select distinct var_a where  brack_open var_x eiopa-base:haseucountrywhereentityoperates var_a sep_dot var_x eiopa-base:hasinsuranceundertakingid <a> sep_dot brack_close '}
['W1642']
{'natural_language': 'In what jurisdiction does w1642 operate?', 'query': 'select distinct var_a where  brack_open var_x eiopa-base:haseucountrywhereentityoperates var_a sep_dot var_x eiopa-base:hasinsuranceundertaki

Prepare generator query

In [None]:
template = templates[0]
generator_query = getattr(template, 'generator_query')
def first_attempt(template): return prepare_generator_query(template)
def second_attempt(template): return prepare_generator_query(
    template, do_special_class_replacement=False)
def third_attempt(template): return prepare_generator_query(
    template, add_type_requirements=False)

# for attempt, prepare_query in enumerate([first_attempt, second_attempt,\
#      third_attempt], start=1):
#     prepare_query(generator_query)
# prepare_generator_query(template)

Generate queries

In [None]:
queries = []
for row in g.query(templates[0].generator_query):
    for item in row:
        queries.append(templates[0].query.replace("<A>",'"'+ str(item) + '"'))

In [None]:
print(len(queries))
print("\n".join(queries))

## Graph query function

In [None]:
QUERY = "select ?a where { ?x gleif-L1:hasLegalName <A>. ?x gleif-L1:hasLegalAddress/gleif-base:hasCity ?a. }"
print(QUERY)

In [None]:
q = 'select distinct ?c where { ?b gleif-base:hasLegalJurisdiction CountryCodes:NL.  ?b gleif-L1:hasLegalName "AEGON Schadeverzekering N.V.". ?b gleif-L1:hasLegalAddress/gleif-base:hasCity ?c. }'

In [None]:
for row in g.query(q):
    for item in row:
        print(item)

In [None]:
cnt = 0
for row in g.query(template.generator_query):
    cnt += 1
    for item in row:
        print(item)
        

In [None]:
item,cnt

In [None]:
queries = []
for row in g.query(templates[0].generator_query):
    for item in row:
        queries.append(templates[0].query.replace("<A>",'"'+ str(item) + '"'))

In [None]:
queries

In [None]:
answer = g.query('select ?x where { ?x gleif-L1:hasLegalName "AMMA VERZEKERINGEN"@nl}')
for row in answer:
    for item in row:
        print(item)


In [None]:
item = '"'+ str(item) + '"'

In [None]:
item

In [None]:
for row in g.query(queries[0]):
    for item in row:
        print(item)

In [None]:
ontology_class = 'http://dbpedia.org/ontology/house'
str.startswith(ontology_class, 'http://dbpedia.org/ontology/')

In [None]:
for cnt,variabel in enumerate(template.variables):
    print(cnt)
    print(variabel)