# Benchmarks con el dataset de UM

El siguiente Script calculara benchmarks usando el dataset de la Universidad de Murcia, creando datos sinteticos a partir de los datos reales

## Importaciones

In [1]:
import xml.dom.minidom
import re
import uuid 
from datetime import datetime
import pickle
from pathlib import Path
from bs4 import BeautifulSoup as BS
import shutil
import os
import xml.etree.ElementTree as ET

## Lectura de datos desde datos reales

Expresiones Regulares

In [2]:
URL_REGEX = r"""(?i)\b((?:https?:(?:/{1,3}|[a-z0-9%])|[a-z0-9.\-]+[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)/)(?:[^\s()<>{}\[\]]+|\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\))+(?:\([^\s()]*?\([^\s()]+\)[^\s()]*?\)|\([^\s]+?\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’])|(?:(?<!@)[a-z0-9]+(?:[.\-][a-z0-9]+)*[.](?:com|net|org|edu|gov|mil|aero|asia|biz|cat|coop|info|int|jobs|mobi|museum|name|post|pro|tel|travel|xxx|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|Ja|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|yu|za|zm|zw)\b/?(?!@)))/"""

Constantes

In [3]:
dataNumber = [50000,250000,500000,1000000,5000000] # numero de datos
# dataNumber = [50000,250000,500000,1000000] # numero de datos
base_dir = './data/sintetic_data/' # directorio base de datos

## Funciones

Leer datos semilla

In [4]:
def parseRDF(path,mode,encoding='utf8'): # Read file data
    with open(path,mode, encoding=encoding) as f:
        types = set()
        RDFs = []
        rdf = ""
        in_description = False
        for line in f: # Por cada linea
            if (line.startswith("<rdf:RDF")): # Si es el principio de cada XML
                rdf = ""
                in_description = False
            elif (line.startswith("</rdf:RDF>")): # Si es el final de cada XML
                RDFs.append(rdf)
                rdf = ""
            else:
                if (line.startswith("  <rdf:Description")>0):
                    in_description = True
                elif (line.startswith("  </rdf:Description")):
                    rdf = rdf + str(line.encode(encoding, 'replace'))[2:-1].replace("\\n","\n")
                    in_description = False
                if in_description:
                    if ("<rdf:type" in line):
                        res = re.findall(URL_REGEX,line)[0].split("/")[-1] if len(re.findall(URL_REGEX,line))>0 else None
                        types.add(res)
                    rdf = rdf + str(line.encode(encoding, 'replace'))[2:-1].replace("\\n","\n") # Si es el medio del XML
        return RDFs, types


Tamaño en bytes de un string

In [5]:
def utf8len(s):
    return len(s.encode('utf-8'))

Expresion regular para identificar UUIDs

In [6]:
def create_uuid_pattern(version): # Funcion para crear regex para identificar uuid
    return re.compile(
        (
            '[a-f0-9]{8}-' +
            '[a-f0-9]{4}-' +
            version + '[a-f0-9]{3}-' +
            '[89ab][a-f0-9]{3}-' +
            '[a-f0-9]{12}'
        ),
        re.IGNORECASE
    )

Función para generar datos sinteticos (n) a partir de datos reales (data)

In [7]:
def generateData(data,n): # generacion de datos sinteticos a partir de datos reales
    if (len(data)>n):
        return data
    else:
        sintetic_data_colection = []
        sintetic_data = data.copy()
        UUID_ALL_PATTERN = create_uuid_pattern('[1-5]')
        split_size = utf8len("".join(data))
        for i in range(len(data),n): # hasta alcanzar el numero deseado
            index = i%len(data) # index in original data
            id_uuid = UUID_ALL_PATTERN.findall(RDFs[index])[0]
            if (len(id_uuid)>0):
                sintetic_data.append(RDFs[index].replace(id_uuid,str(uuid.uuid1())))
                split_size += utf8len(RDFs[index])
            if (split_size>3900000):
                sintetic_data_colection.append(sintetic_data)
                sintetic_data = []
                split_size = 0
        sintetic_data_colection.append(sintetic_data.copy())
        return sintetic_data_colection
        

Funciones para leer y escribir objetos en ficheros, util para reusar datos sinteticos

In [8]:
def save_obj(path, obj, name ): # Salvar objeto a fichero
    with open(path +'/'+ name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(path,name ): # leer objeto desde fichero
    with open(path +'/'+ name + '.pkl', 'rb') as f:
        return pickle.load(f)

## Generación de datos sinteticos hasta máximo

Creación de datos sinteticos para **50K, 250K, 1M, 5M, 25M**

In [9]:
GENERATE_NEW_DATA = False;
prefix = """<?xml version="1.0" encoding="UTF-8" ?>
    <rdf:RDF
        xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
        xmlns:j.0="http://hercules.org/um/es-ES/rec/">
    """
sufix = """</rdf:RDF>
    """

(RDFs,t) = parseRDF('./data/rdf.log','r',encoding='utf-8')

if (GENERATE_NEW_DATA):
    if (Path(base_dir).is_dir()):
        shutil.rmtree(base_dir)
    os.mkdir(base_dir)
    dataSet = {} # Estructura
    maxData = max(dataNumber)
    print('Generating sintetic data ('+str(maxData)+')')
    now = datetime.now()
    data = generateData(RDFs,maxData)
    later = datetime.now()
    print('Data ('+str(maxData)+') is generated in '+str((later - now).total_seconds()) + ' seconds')
    
    dataLen = {}
    for dn in dataNumber:
        dataLen[dn] = 0
    
    def getDataSet(dataNumber,elements):
        for dn in dataNumber:
            if dn > elements:
                return dn
    
    counter = 0
    counterElements = 0;
    for d in data:
        counter += 1
        dataSet = getDataSet(dataLen,counterElements)
        if (dataSet-counterElements)>=len(d): # Si caben todos los elementos
            file_name = f'{base_dir}dataset_{dataSet}_number_{counter}_from_{counterElements+1}_to_{(counterElements+len(d))}.txt'
            f = open(file_name, "w")
            f.write(prefix+"".join(d)+sufix)
            f.close()
            dataLen[dataSet] = dataLen[dataSet] + len(d)
            counterElements += len(d)
            print("Create file "+file_name + ' in Set ' + str(dataSet) + ' with '+ str(len(d)) + ', new elements, remaining space available: ' + str(dataLen[dataSet]))
        else:
            size_free = dataSet-counterElements
            allowed_data = d[:size_free]
            file_name = f'{base_dir}dataset_{dataSet}_number_{counter}_from_{counterElements+1}_to_{(counterElements+len(allowed_data))}.txt'
            f = open(file_name, "w")
            f.write(prefix+("".join(allowed_data))+sufix)
            f.close()
            counterElements += len(allowed_data)
            print("Create file with allowed "+file_name + ' in Set ' + str(dataSet) + ' with '+ str(len(allowed_data)) + ', new elements, remaining space available: ' + str(dataSet-counterElements))

            # Datos restantes
            remain_data = d[size_free:]
            counter += 1
            nextDataNumber = dataNumber[dataNumber.index(dataSet)+1] if dataNumber.index(dataSet)+1 < len(dataNumber) else dataNumber.index(dataSet)
            file_name = f'{base_dir}dataset_{nextDataNumber}_number_{counter}_from_{counterElements+1}_to_{(counterElements+len(remain_data))}.txt'
            f = open(file_name, "w")
            f.write(prefix+"".join(remain_data)+sufix)
            f.close()
            counterElements += len(remain_data)
            print("Create file with remain "+file_name + ' in Set ' + str(nextDataNumber) + ' with '+ str(len(remain_data)) + ', new elements, remaining space available: ' + str(nextDataNumber-counterElements) + ' int same iteration')
    

## QUERIES

Obtener el numero total de tripletas

In [10]:
Q1 = """PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

DESCRIBE <http://hercules.org/um/es-ES/rec/AnualidadFinanciacionRegistroAyudaDefinitiva/e73fc9ee-382e-4a83-b9d5-58d3c45c5d81>"""

Q1_N = """
DESCRIBE <http://hercules.org/um/es-ES/rec/AnualidadFinanciacionRegistroAyudaDefinitiva/e73fc9ee-382e-4a83-b9d5-58d3c45c5d81>"""


# COUNT: Contar el numero de tripletas
Q2 = """PREFIX un: <http://www.w3.org/2007/ont/unit#>
PREFIX uni: <http://purl.org/weso/uni/uni.html#>
prefix univ:<http://people.brunel.ac.uk/~csstnns/university.owl#>
prefix sp:<http://www.meta-qsar.org/ontologies/sport.owl#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT (COUNT(*) as ?Triples) WHERE { ?s ?p ?o}"""

Q2_N = """SELECT (COUNT(*) as ?Triples) WHERE { ?s ?p ?o}"""

# FILTER: WHERE idPersona = 2211
Q3 = """PREFIX un: <http://www.w3.org/2007/ont/unit#>
PREFIX uni: <http://purl.org/weso/uni/uni.html#>
prefix univ:<http://people.brunel.ac.uk/~csstnns/university.owl#>
prefix sp:<http://www.meta-qsar.org/ontologies/sport.owl#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT ?s ?p ?o
WHERE {
  ?s <http://hercules.org/um/es-ES/rec/idPersona> ?o
  FILTER(?o = "2211")
}"""

Q3_N = """SELECT ?s ?p ?o
WHERE {
  ?s <http://hercules.org/um/es-ES/rec/idPersona> ?o
  FILTER(?o = "2211")
}"""

# DISTINCT: Personas distintas
Q4 = """PREFIX un: <http://www.w3.org/2007/ont/unit#>
PREFIX uni: <http://purl.org/weso/uni/uni.html#>
prefix univ:<http://people.brunel.ac.uk/~csstnns/university.owl#>
prefix sp:<http://www.meta-qsar.org/ontologies/sport.owl#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>

SELECT DISTINCT ?o WHERE { ?s <http://hercules.org/um/es-ES/rec/idPersona> ?o }"""

Q4_N = """SELECT DISTINCT ?o WHERE { ?s <http://hercules.org/um/es-ES/rec/idPersona> ?o }"""


queries = [Q1,Q2,Q3,Q4]

queries_no_prefix = [Q1_N,Q2_N,Q3_N,Q4_N]

## General Triple Stores

Método para realizar la petición Post para 1 fichero

In [11]:
def sendDataFromFile(filePath,host='localhost',port=3030,url='/trellis/data?graph=trellis'):
    import http.client
    import mimetypes
    conn = http.client.HTTPConnection(host, port)
    dataList = []
    boundary = 'wL36Yn8afVp8Ag7AmP8qZ0SA4n1v9T'
    dataList.append('--' + boundary)
    dataList.append('Content-Disposition: form-data; name=file; filename={0}'.format(filePath))

    fileType = mimetypes.guess_type(filePath)[0] or 'application/octet-stream'
    dataList.append('Content-Type: {}'.format(fileType))
    dataList.append('')

    with open(filePath) as f:
      dataList.append(f.read())
    dataList.append('--'+boundary+'--')
    dataList.append('')
    body = '\r\n'.join(dataList)
    payload = body
    headers = {
       'Content-type': 'multipart/form-data; boundary={}'.format(boundary) 
    }
    conn.request("POST", url, payload, headers)
    res = conn.getresponse()
    data = res.read()
    return data.decode("utf-8")

In [12]:
def sendDataFromFileBlazegraph(filePath,host='localhost',port=8889,url='/bigdata/sparql'):
    import requests
    dataList = []
    with open(filePath) as f:
        dataList.append(f.read())
    body = '\r\n'.join(dataList)
    payload = body
    headers = {
      'Content-Type': 'application/rdf+xml'
    }
    response = requests.request("POST", 'http://'+host+':'+str(port)+url, headers=headers, data = payload)

    return  response.text

In [13]:
#sendDataFromFileBlazegraph(filePath='./data/sintetic_data/dataset_50000_number_1_from_1_to_6502.txt')

Método para realizar una query

In [14]:
def sendQuery(query,host='localhost',port=3030,url='/trellis/data?graph=trellis'):
    import http.client
    import mimetypes
    import urllib.parse
    conn = http.client.HTTPConnection("localhost", 3030)
    payload = f'query={query}'
    headers = {
      'Content-Type': 'application/x-www-form-urlencoded',
      'Accept': 'application/json'
    }
    conn.request("POST", "/trellis/sparql", payload, headers)
    res = conn.getresponse()
    data = res.read()
    return data.decode("utf-8")

In [15]:
def sendQueryBlazegraph(query,host='localhost',port=8889,url='/bigdata/sparql'):
    import requests

    url = f"http://{host}:{port}/bigdata/sparql?query={query}"

    payload = {}
    headers = {
      'Accept': 'application/json'
    }

    response = requests.request("POST", url, headers=headers, data = payload)

    return response.text


### FUSEKI + TDB2

In [23]:
GENERATE_NEW_TDB2_MEAUSERES = True
if GENERATE_NEW_TDB2_MEAUSERES:
    updateTime = []
    queryTime = []
    for dn in dataNumber:

        nowUpdate = datetime.now() # Comienzo de momento actual para cada grupo

        # Insercion
        for f in os.listdir(base_dir):
            setData = int(re.search(r'\d+', f).group()) # Obtengo grupo

            if dn == setData: # Si pertenece al grupo
                res = sendDataFromFile(filePath=f'{base_dir}{f}') # Envio el dato
                print(dn)
        # Finalizado el grupo
        laterUpdate = datetime.now() # Tiempo final para el grupo

            # Tiempo empleado para Updates
        deltaUpdate = laterUpdate - nowUpdate
        if len(updateTime)>0: # Si procese la iteracion update anterior
            updateTime.append({'set': dn,'start': nowUpdate, 'end': laterUpdate ,'deltaTime': deltaUpdate.total_seconds(),'acumDeltaTime': deltaUpdate.total_seconds() + updateTime[len(updateTime)-1]['acumDeltaTime']})
        else:
            updateTime.append({'set': dn,'start': nowUpdate, 'end': laterUpdate ,'deltaTime': deltaUpdate.total_seconds(),'acumDeltaTime': deltaUpdate.total_seconds()})

        # Tiempo empleado para Queries
        counter_q = 0
        queryTimeObj = {'set': dn, 'queries':[] }
        for q in queries:
            counter_q += 1
            now_q = datetime.now()
            result = sendQuery(q)
            later_q = datetime.now()
            delta_q = later_q - now_q
            queryTimeObj['queries'].append({'start': now_q, 'end': later_q, 'deltaTime': delta_q.total_seconds(), 'queryId': f'Q{counter_q}', 'query': q})
        queryTime.append(queryTimeObj)

    import pandas as pd
    udf = pd.DataFrame(columns=['elements','delta_time_seg','amount_delta_time_seg'])
    for ut in updateTime:
        udf = udf.append({'elements': ut['set'],'delta_time_seg': ut['deltaTime'],'amount_delta_time_seg': ut['acumDeltaTime']}, ignore_index=True)
    udf.to_csv('./results/updateMetricFuseki.csv', index=False)
    qdf = pd.DataFrame(columns=['elements','Q1_delta_time_seg','Q2_delta_time_seg','Q3_delta_time_seg','Q4_delta_time_seg'])
    for qt in queryTime:
        qtObj = {'elements':qt['set']}
        for q in qt['queries']:
            qtObj[f"{q['queryId']}_delta_time_seg"]=q['deltaTime']
        qdf = qdf.append(qtObj, ignore_index=True)
    qdf.to_csv('./results/queriesMetricFuseki.csv', index=False)

50000
50000
50000
50000
50000
50000
50000
50000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
250000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
500000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000
1000000

### BlazeGraph

In [17]:
GENERATE_NEW_BLAZEGRAPH_MEAUSERES = True
if GENERATE_NEW_BLAZEGRAPH_MEAUSERES:
    updateTime = []
    queryTime = []
    for dn in dataNumber:

        nowUpdate = datetime.now() # Comienzo de momento actual para cada grupo

        # Insercion
        for f in os.listdir(base_dir):
            setData = int(re.search(r'\d+', f).group()) # Obtengo grupo

            if dn == setData: # Si pertenece al grupo
                res = sendDataFromFileBlazegraph(filePath=f'{base_dir}{f}') # Envio el dato
                print(res)
        # Finalizado el grupo
        laterUpdate = datetime.now() # Tiempo final para el grupo

            # Tiempo empleado para Updates
        deltaUpdate = laterUpdate - nowUpdate
        if len(updateTime)>0: # Si procese la iteracion update anterior
            updateTime.append({'set': dn,'start': nowUpdate, 'end': laterUpdate ,'deltaTime': deltaUpdate.total_seconds(),'acumDeltaTime': deltaUpdate.total_seconds() + updateTime[len(updateTime)-1]['acumDeltaTime']})
        else:
            updateTime.append({'set': dn,'start': nowUpdate, 'end': laterUpdate ,'deltaTime': deltaUpdate.total_seconds(),'acumDeltaTime': deltaUpdate.total_seconds()})

        # Tiempo empleado para Queries
        counter_q = 0
        queryTimeObj = {'set': dn, 'queries':[] }
        for q in queries_no_prefix:
            counter_q += 1
            now_q = datetime.now()
            result = sendQueryBlazegraph(q)
            later_q = datetime.now()
            delta_q = later_q - now_q
            queryTimeObj['queries'].append({'start': now_q, 'end': later_q, 'deltaTime': delta_q.total_seconds(), 'queryId': f'Q{counter_q}', 'query': q})
        queryTime.append(queryTimeObj)

    import pandas as pd
    udf = pd.DataFrame(columns=['elements','delta_time_seg','amount_delta_time_seg'])
    for ut in updateTime:
        udf = udf.append({'elements': ut['set'],'delta_time_seg': ut['deltaTime'],'amount_delta_time_seg': ut['acumDeltaTime']}, ignore_index=True)
    udf.to_csv('./results/updateMetricBlazegraph.csv', index=False)
    qdf = pd.DataFrame(columns=['elements','Q1_delta_time_seg','Q2_delta_time_seg','Q3_delta_time_seg','Q4_delta_time_seg'])
    for qt in queryTime:
        qtObj = {'elements':qt['set']}
        for q in qt['queries']:
            qtObj[f"{q['queryId']}_delta_time_seg"]=q['deltaTime']
        qdf = qdf.append(qtObj, ignore_index=True)
    qdf.to_csv('./results/queriesMetricBlazegraph.csv', index=False)

<?xml version="1.0"?><data modified="51015" milliseconds="444"/>
<?xml version="1.0"?><data modified="50807" milliseconds="270"/>
<?xml version="1.0"?><data modified="50866" milliseconds="279"/>
<?xml version="1.0"?><data modified="50996" milliseconds="288"/>
<?xml version="1.0"?><data modified="50751" milliseconds="297"/>
<?xml version="1.0"?><data modified="50952" milliseconds="571"/>
<?xml version="1.0"?><data modified="50978" milliseconds="410"/>
<?xml version="1.0"?><data modified="32369" milliseconds="331"/>
<?xml version="1.0"?><data modified="51013" milliseconds="270"/>
<?xml version="1.0"?><data modified="50910" milliseconds="262"/>
<?xml version="1.0"?><data modified="50740" milliseconds="250"/>
<?xml version="1.0"?><data modified="51040" milliseconds="247"/>
<?xml version="1.0"?><data modified="50855" milliseconds="251"/>
<?xml version="1.0"?><data modified="50796" milliseconds="265"/>
<?xml version="1.0"?><data modified="51045" milliseconds="265"/>
<?xml version="1.0"?><dat

<?xml version="1.0"?><data modified="51056" milliseconds="1522"/>
<?xml version="1.0"?><data modified="50823" milliseconds="2379"/>
<?xml version="1.0"?><data modified="50823" milliseconds="2609"/>
<?xml version="1.0"?><data modified="51023" milliseconds="1883"/>
<?xml version="1.0"?><data modified="50774" milliseconds="2198"/>
<?xml version="1.0"?><data modified="50889" milliseconds="2955"/>
<?xml version="1.0"?><data modified="50978" milliseconds="2622"/>
<?xml version="1.0"?><data modified="50732" milliseconds="2788"/>
<?xml version="1.0"?><data modified="50975" milliseconds="2511"/>
<?xml version="1.0"?><data modified="32658" milliseconds="2443"/>
<?xml version="1.0"?><data modified="34715" milliseconds="2421"/>
<?xml version="1.0"?><data modified="50750" milliseconds="2854"/>
<?xml version="1.0"?><data modified="50940" milliseconds="2953"/>
<?xml version="1.0"?><data modified="50972" milliseconds="2974"/>
<?xml version="1.0"?><data modified="50705" milliseconds="2583"/>
<?xml vers

<?xml version="1.0"?><data modified="51031" milliseconds="4675"/>
<?xml version="1.0"?><data modified="50782" milliseconds="4406"/>
<?xml version="1.0"?><data modified="50888" milliseconds="3886"/>
<?xml version="1.0"?><data modified="50998" milliseconds="3783"/>
<?xml version="1.0"?><data modified="50750" milliseconds="4892"/>
<?xml version="1.0"?><data modified="50940" milliseconds="4280"/>
<?xml version="1.0"?><data modified="50972" milliseconds="4564"/>
<?xml version="1.0"?><data modified="50705" milliseconds="4911"/>
<?xml version="1.0"?><data modified="51030" milliseconds="4497"/>
<?xml version="1.0"?><data modified="50900" milliseconds="4292"/>
<?xml version="1.0"?><data modified="50747" milliseconds="4543"/>
<?xml version="1.0"?><data modified="51042" milliseconds="4316"/>
<?xml version="1.0"?><data modified="50858" milliseconds="4105"/>
<?xml version="1.0"?><data modified="50794" milliseconds="4456"/>
<?xml version="1.0"?><data modified="51022" milliseconds="4093"/>
<?xml vers

<?xml version="1.0"?><data modified="50763" milliseconds="4425"/>
<?xml version="1.0"?><data modified="51018" milliseconds="4366"/>
<?xml version="1.0"?><data modified="50871" milliseconds="4441"/>
<?xml version="1.0"?><data modified="50782" milliseconds="4652"/>
<?xml version="1.0"?><data modified="51051" milliseconds="4632"/>
<?xml version="1.0"?><data modified="50816" milliseconds="4099"/>
<?xml version="1.0"?><data modified="50833" milliseconds="3986"/>
<?xml version="1.0"?><data modified="51013" milliseconds="3965"/>
<?xml version="1.0"?><data modified="50769" milliseconds="4298"/>
<?xml version="1.0"?><data modified="50909" milliseconds="4126"/>
<?xml version="1.0"?><data modified="50983" milliseconds="4120"/>
<?xml version="1.0"?><data modified="50714" milliseconds="4372"/>
<?xml version="1.0"?><data modified="51005" milliseconds="4212"/>
<?xml version="1.0"?><data modified="50920" milliseconds="4243"/>
<?xml version="1.0"?><data modified="50755" milliseconds="4488"/>
<?xml vers

<?xml version="1.0"?><data modified="50978" milliseconds="4440"/>
<?xml version="1.0"?><data modified="50732" milliseconds="4160"/>
<?xml version="1.0"?><data modified="50975" milliseconds="4141"/>
<?xml version="1.0"?><data modified="50946" milliseconds="4213"/>
<?xml version="1.0"?><data modified="50734" milliseconds="3834"/>
<?xml version="1.0"?><data modified="51006" milliseconds="4458"/>
<?xml version="1.0"?><data modified="50911" milliseconds="4260"/>
<?xml version="1.0"?><data modified="50734" milliseconds="4467"/>
<?xml version="1.0"?><data modified="51059" milliseconds="4401"/>
<?xml version="1.0"?><data modified="50834" milliseconds="4376"/>
<?xml version="1.0"?><data modified="50821" milliseconds="4249"/>
<?xml version="1.0"?><data modified="51031" milliseconds="3891"/>
<?xml version="1.0"?><data modified="50782" milliseconds="4780"/>
<?xml version="1.0"?><data modified="50888" milliseconds="4503"/>
<?xml version="1.0"?><data modified="50998" milliseconds="4306"/>
<?xml vers

<?xml version="1.0"?><data modified="50796" milliseconds="4046"/>
<?xml version="1.0"?><data modified="51045" milliseconds="3650"/>
<?xml version="1.0"?><data modified="50786" milliseconds="4701"/>
<?xml version="1.0"?><data modified="50855" milliseconds="4014"/>
<?xml version="1.0"?><data modified="51021" milliseconds="3996"/>
<?xml version="1.0"?><data modified="50740" milliseconds="4107"/>
<?xml version="1.0"?><data modified="50931" milliseconds="4028"/>
<?xml version="1.0"?><data modified="50988" milliseconds="4112"/>
<?xml version="1.0"?><data modified="50711" milliseconds="3755"/>
<?xml version="1.0"?><data modified="51007" milliseconds="3943"/>
<?xml version="1.0"?><data modified="50912" milliseconds="4115"/>
<?xml version="1.0"?><data modified="50763" milliseconds="3592"/>
<?xml version="1.0"?><data modified="51018" milliseconds="4106"/>
<?xml version="1.0"?><data modified="50871" milliseconds="3761"/>
<?xml version="1.0"?><data modified="50782" milliseconds="4234"/>
<?xml vers

<?xml version="1.0"?><data modified="50936" milliseconds="4180"/>
<?xml version="1.0"?><data modified="50743" milliseconds="3579"/>
<?xml version="1.0"?><data modified="51001" milliseconds="3987"/>
<?xml version="1.0"?><data modified="50894" milliseconds="3695"/>
<?xml version="1.0"?><data modified="50752" milliseconds="3917"/>
<?xml version="1.0"?><data modified="51056" milliseconds="3984"/>
<?xml version="1.0"?><data modified="50823" milliseconds="4691"/>
<?xml version="1.0"?><data modified="50823" milliseconds="3901"/>
<?xml version="1.0"?><data modified="51023" milliseconds="4063"/>
<?xml version="1.0"?><data modified="50774" milliseconds="4485"/>
<?xml version="1.0"?><data modified="50889" milliseconds="3736"/>
<?xml version="1.0"?><data modified="50978" milliseconds="3855"/>
<?xml version="1.0"?><data modified="50732" milliseconds="4121"/>
<?xml version="1.0"?><data modified="50975" milliseconds="3544"/>
<?xml version="1.0"?><data modified="50946" milliseconds="3843"/>
<?xml vers

In [19]:
udf

Unnamed: 0,elements,delta_time_seg,amount_delta_time_seg
0,50000.0,3.359767,3.359767
1,250000.0,17.131264,20.491031
2,500000.0,43.918947,64.409978
3,1000000.0,165.398358,229.808336
4,5000000.0,2552.058824,2781.86716


In [24]:
pd.read_csv('./results/updateMetricFuseki.csv') 

Unnamed: 0,elements,delta_time_seg,amount_delta_time_seg
0,50000.0,6.593503,6.593503
1,250000.0,18.553377,25.14688
2,500000.0,26.531804,51.678684
3,1000000.0,55.428372,107.107056
4,5000000.0,493.131946,600.239002


In [26]:
pd.read_csv('./results/queriesMetricFuseki.csv') 

Unnamed: 0,elements,Q1_delta_time_seg,Q2_delta_time_seg,Q3_delta_time_seg,Q4_delta_time_seg
0,50000.0,0.0677,15.351222,1.066817,7.800106
1,250000.0,0.013002,23.949527,0.881751,7.956429
2,500000.0,0.010995,30.055681,1.008468,8.378793
3,1000000.0,0.011002,31.07324,0.927235,8.034584
4,5000000.0,0.010017,36.973381,1.00472,10.692471


In [27]:
pd.read_csv('./results/queriesMetricBlazegraph.csv') 

Unnamed: 0,elements,Q1_delta_time_seg,Q2_delta_time_seg,Q3_delta_time_seg,Q4_delta_time_seg
0,50000.0,0.033353,0.022999,0.502146,0.028001
1,250000.0,0.036598,0.021998,0.63966,0.013734
2,500000.0,0.075981,0.022996,1.429181,0.027963
3,1000000.0,0.053015,0.032986,2.954163,0.027038
4,5000000.0,0.085023,0.036953,16.448291,0.05903
