## intro :: generic predefined section kgap-demo

### defines standard scripts backed by `sema.query`

In [13]:
from kgap_tools import execute_to_df, generate_sparql
from pathlib import Path

### show the generate_sparql expansion from the templates in `./queries/*.sparql`

In [2]:
print("generated sparql in action...")
sparql: str = generate_sparql("all.sparql", N=5) # general spo query with limit 5
print(sparql)

generated sparql in action...
SELECT * WHERE { ?s ?p ?o. }
LIMIT 5


### just execute that query to pandas dataframe

In [3]:
execute_to_df("all.sparql", N=5)

Unnamed: 0,s,p,o
0,genid-3ef869bc5c4946f38793fefa95da3c3c8-b0_df_...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,https://aphia.org/ns/taxon-name/Taxon
1,genid-3ef869bc5c4946f38793fefa95da3c3c8-b0_df_...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,http://www.w3.org/2004/02/skos/core#Concept
2,https://aphia.org/id/taxname/1864475,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,https://aphia.org/ns/taxon-name/TaxonName
3,https://aphia.org/id/taxname/1864475,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,https://www.w3.org/2008/05/skos-xl#Label
4,genid-3ef869bc5c4946f38793fefa95da3c3c8-b0_df_...,http://www.w3.org/1999/02/22-rdf-syntax-ns#type,https://aphia.org/ns/taxon-name/Taxon


# Aphia.org SYNC DASHBOARD

In [4]:
from kgap_tools import execute_to_df, generate_sparql
params: dict = dict()

## find types

In [5]:
params = dict(regex="aphia") 
execute_to_df("rdf-types-count.sparql", **params)

Unnamed: 0,type,count
0,https://aphia.org/ns/taxon-name/Taxon,3180436
1,https://aphia.org/ns/taxon-name/TaxonName,1584996


## find taxname subjects

In [6]:
params = dict()
execute_to_df("aphia-taxnames.sparql", **params)

Unnamed: 0,tn
0,https://aphia.org/id/taxname/1864475
1,https://aphia.org/id/taxname/882221
2,https://aphia.org/id/taxname/882224
3,https://aphia.org/id/taxname/1864476
4,https://aphia.org/id/taxname/1864488
...,...
1584991,https://aphia.org/id/taxname/506232
1584992,https://aphia.org/id/taxname/1868551
1584993,https://aphia.org/id/taxname/152676
1584994,https://aphia.org/id/taxname/385677


## find predicates from aphia_id

In [7]:
# see the ./queries/aphia-taxn-predicates.sparql file for the available prefixes and general approach
aphiaids: list[int] = [1864475, 1864476, ] # [] use empty list to get them all -- alternative, make a pick.
preds: list[str] = []                      # here an empty list will fallback to the predicates ["taxn:fullScientificName", "dwc:scientificName", ]
params = dict(aphiaids=aphiaids, preds=preds)
execute_to_df("aphia-taxn-predicates.sparql", **params)

Unnamed: 0,tn,pred,value,value_ln,value_type
0,https://aphia.org/id/taxname/1864475,https://aphia.org/ns/taxon-name/fullScientific...,"Hypelasma salevensis (Favre, 1913)",,http://www.w3.org/2001/XMLSchema#string
1,https://aphia.org/id/taxname/1864475,https://rs.tdwg.org/dwc/terms/scientificName,"Hypelasma salevensis (Favre, 1913)",,http://www.w3.org/2001/XMLSchema#string
2,https://aphia.org/id/taxname/1864476,https://aphia.org/ns/taxon-name/fullScientific...,"Matheronia salevensis Favre, 1913",,http://www.w3.org/2001/XMLSchema#string
3,https://aphia.org/id/taxname/1864476,https://rs.tdwg.org/dwc/terms/scientificName,"Matheronia salevensis Favre, 1913",,http://www.w3.org/2001/XMLSchema#string


### list ordered aphia-ids from triple store

In [15]:
ids_df = execute_to_df("aphia-ids.sparql")
exportcsv = "/data/exports/unique-aphiaids.csv"
Path(exportcsv).parent.mkdir(parents=True, exist_ok=True)
ids_df.to_csv(exportcsv, index=False, header=False)
ids_df

Unnamed: 0,id
0,9
1,11
2,12
3,13
4,14
...,...
1584991,1868535
1584992,1868536
1584993,1868537
1584994,1868538


# TODO -- add lookup by name functions
* alternative by tapping into the sparql endpoint
* alternative by calling to the worms-webservice