Skip to content

Commit

Permalink
Default query to Local, pass remote=True to do remote queries on endp…
Browse files Browse the repository at this point in the history
…oint
  • Loading branch information
IKCAP committed Feb 23, 2023
1 parent 671328a commit d5d2f7a
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 33 deletions.
39 changes: 16 additions & 23 deletions pylipd/lipd.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ class LiPD:
'''
def __init__(self):
self.initialize_graph()
self.remote = False

def initialize_graph(self):
self.graph = ConjunctiveGraph()
Expand Down Expand Up @@ -146,7 +145,6 @@ def load(self, lipdfiles, collection_id=None):
multi_convert_to_pickle(filemap, collection_id)
print("Conversion to RDF done..")

self.remote = False
print("Loading RDF into graph")
for lipdfile in lipdfiles:
picklefile = filemap[lipdfile]
Expand All @@ -158,6 +156,11 @@ def load(self, lipdfiles, collection_id=None):
print("Loaded..")


def clear(self):
'''Clears the graph'''
self.initialize_graph()


def set_endpoint(self, endpoint):
'''Sets a SparQL endpoint for a remote Knowledge Base (example: GraphDB)
Expand All @@ -184,7 +187,6 @@ def set_endpoint(self, endpoint):
print(lipd_remote.get_all_dataset_names())
'''
self.remote = True
self.endpoint = endpoint


Expand Down Expand Up @@ -245,7 +247,7 @@ def convert_lipd_dir_to_rdf(self, lipd_dir, rdf_file, collection_id=None):
print("Written..")


def query(self, query, result="sparql"):
def query(self, query, remote=False, result="sparql"):
'''Once LiPD files or loaded into the graph (or remote endpoint set), one can make SparQL queries to the graph
Parameters
Expand All @@ -254,6 +256,9 @@ def query(self, query, result="sparql"):
query : str
SparQL query
remote: bool
(Optional) If set to True, the query will be made to the remote endpoint (if set)
result : str
(Optional) Result return type
Expand Down Expand Up @@ -290,7 +295,7 @@ def query(self, query, result="sparql"):
result_df
'''

if self.remote:
if remote and self.endpoint:
matches = re.match(r"\s*SELECT\s+(.+)\s+WHERE\s+{(.+)}\s*", query, re.DOTALL)
if matches:
vars = matches.group(1)
Expand Down Expand Up @@ -335,7 +340,7 @@ def load_remote_datasets(self, dsnames):
lipd_remote.load_remote_datasets(["Ocn-MadangLagoonPapuaNewGuinea.Kuhnert.2001", "MD98_2181.Stott.2007", "Ant-WAIS-Divide.Severinghaus.2012"])
print(lipd_remote.get_all_dataset_names())
'''
if not self.remote or not self.endpoint:
if not self.endpoint:
raise Exception("No remote endpoint")

if type(dsnames) is not list:
Expand All @@ -345,14 +350,13 @@ def load_remote_datasets(self, dsnames):
raise Exception("No dataset names to cache")
dsnamestr = (' '.join('<' + NSURL + "/" + dsname + '>' for dsname in dsnames))
print("Caching datasets from remote endpoint..")
qres, qres_df = self.query(f"SELECT ?s ?p ?o ?g WHERE {{ GRAPH ?g {{ ?s ?p ?o }} VALUES ?g {{ {dsnamestr} }} }}")
qres, qres_df = self.query(f"SELECT ?s ?p ?o ?g WHERE {{ GRAPH ?g {{ ?s ?p ?o }} VALUES ?g {{ {dsnamestr} }} }}", remote=True)

# Reinitialize graph
self.initialize_graph()
# self.initialize_graph()
for row in qres:
self.graph.add((row.s, row.p, row.o, row.g))
print("Done..")
self.remote = False

def get_timeseries(self, dsids):
'''Get Legacy LiPD like Time Series Object (tso)
Expand Down Expand Up @@ -381,20 +385,9 @@ def get_timeseries(self, dsids):
for tso in tsos:
if 'paleoData_variableName' in tso:
print(dsid+': '+tso['paleoData_variableName']+': '+tso['archiveType'])
'''
if self.remote:
# Cache datasets locally - to speed up queries
self.load_remote_datasets(dsids)
ts = self._get_timeseries(dsids)

# Go back to remote
self.initialize_graph()
self.set_endpoint(self.endpoint)

return ts
else:
ts = self._get_timeseries(dsids)
return ts
'''
ts = self._get_timeseries(dsids)
return ts

def _get_timeseries(self, dsids):
timeseries = {}
Expand Down
23 changes: 13 additions & 10 deletions pylipd/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

# Load Datasets (from Local and Remote)
dsids = ["MD98_2181.Stott.2007"]
remote_dsids = ["Ocn-MadangLagoonPapuaNewGuinea.Kuhnert.2001"]

# Load from local
lipd = LiPD()
Expand All @@ -32,31 +33,33 @@

lipd.load(lipdfiles)
#lipd.load_from_dir(local_lipd_dir)
print(lipd.get_all_dataset_ids())
print(lipd.get_all_dataset_names())

lipd.load(["/Users/varun/Downloads/Arc-LakeNataujärvi.Ojala.2005.lpd"])
print(lipd.get_all_dataset_ids())
#lipd.load(["/Users/varun/Downloads/Arc-LakeNataujärvi.Ojala.2005.lpd"])
#print(lipd.get_all_dataset_names())

ts_list = lipd.get_timeseries(lipd.get_all_dataset_ids())
for dsid in lipd.get_all_dataset_names():
json = lipd.get_lipd(dsid)
print(json['pub'])

ts_list = lipd.get_timeseries(lipd.get_all_dataset_names())
for dsid, tsos in ts_list.items():
for tso in tsos:
if 'paleoData_variableName' in tso:
print(dsid+': '+tso['paleoData_variableName']+': '+tso['archiveType'])

exit()
# Fetch LiPD data from remote RDF Graph
lipd_remote = LiPD()
lipd_remote.set_endpoint(remote_lipd_endpoint)
lipd.set_endpoint(remote_lipd_endpoint)
lipd.load_remote_datasets(remote_dsids)

# Convert to TSO object (as before)
ts_list_remote = lipd_remote.get_timeseries(dsids)

ts_list_remote = lipd.get_timeseries(lipd.get_all_dataset_names())
for dsid, tsos in ts_list_remote.items():
for tso in tsos:
print(dsid+': '+tso['paleoData_variableName']+': '+tso['archiveType'])

'''
print(lipd.get_all_dataset_ids())
print(lipd.get_all_dataset_names())
datasets = lipd.get_datasets(dsids=dsids)
print("Fetched..")
for ds in datasets:
Expand Down

0 comments on commit d5d2f7a

Please sign in to comment.