Skip to content

Commit

Permalink
Return the results of a sparql query as a pandas dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
khider committed Feb 18, 2023
1 parent 36ca318 commit 27fe55b
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 7 deletions.
18 changes: 13 additions & 5 deletions pylipd/lipd.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
import re
import os.path
import tempfile
import pandas as pd

from rdflib import ConjunctiveGraph, Namespace
from pylipd.multi_processing import multi_convert_to_pickle, multi_convert_to_rdf

from pylipd.rdf_to_lipd import RDFToLiPD
from pylipd.legacy_utils import LiPD_Legacy
from pylipd.utils import sanitizeId
from pylipd.utils import sanitizeId, sparql_results_to_df

from .globals.urls import NSURL, ONTONS

Expand Down Expand Up @@ -261,6 +262,9 @@ def query(self, query, result="sparql"):
result : dict
Dictionary of sparql variable and binding values
result_df : pandas.Dataframe
Return the dictionary above as a pandas.Dataframe
Examples
--------
Expand All @@ -282,17 +286,21 @@ def query(self, query, result="sparql"):
?ds a le:Dataset .
?ds le:hasUrl ?url
}"""
result = lipd.query(query)
print(result)
result, result_df = lipd.query(query)
result_df
'''

if self.remote:
matches = re.match(r"\s*SELECT\s+(.+)\s+WHERE\s+{(.+)}\s*", query, re.DOTALL)
if matches:
vars = matches.group(1)
where = matches.group(2)
query = f"SELECT {vars} WHERE {{ SERVICE <{self.endpoint}> {{ {where} }} }}"
return self.graph.query(query, result=result)
query = f"SELECT {vars} WHERE {{ SERVICE <{self.endpoint}> {{ {where} }} }}"

result = self.graph.query(query)
result_df = sparql_results_to_df(result)

return result, result_df

def load_remote_datasets(self, dsids):
'''Loads remote datasets into cache if a remote endpoint is set
Expand Down
2 changes: 1 addition & 1 deletion pylipd/tests/test_LiPD.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def test_query_t0(self):
if __name__=="__main__":
lipd = LiPD()
lipd.load(url)
result = lipd.query(query)
result, result_df = lipd.query(query)



16 changes: 15 additions & 1 deletion pylipd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import math
import random

from pandas import DataFrame
from rdflib.plugins.sparql.processor import SPARQLResult

import zlib, json, base64

def ucfirst(s):
Expand Down Expand Up @@ -78,4 +81,15 @@ def expand_schema(schema) :
for altkey in pdetails["alternates"]:
xschema[key][altkey] = pdetails
xschema["__expanded"] = True
return xschema
return xschema


def sparql_results_to_df(results: SPARQLResult) -> DataFrame:
"""
Export results from an rdflib SPARQL query into a `pandas.DataFrame`,
using Python types. See https://github.com/RDFLib/rdflib/issues/1179.
"""
return DataFrame(
data=([None if x is None else x.toPython() for x in row] for row in results),
columns=[str(x) for x in results.vars],
)

0 comments on commit 27fe55b

Please sign in to comment.