In [1]:
import re
import json

from pandas import DataFrame

from rdflib.graph import Graph
from rdflib.plugins.sparql.processor import SPARQLResult

In [2]:
def sparql_results_to_df(results: SPARQLResult) -> DataFrame:
    """
    Export results from an rdflib SPARQL query into a `pandas.DataFrame`,
    using Python types. See https://github.com/RDFLib/rdflib/issues/1179.
    """
    return DataFrame(
        data=([None if x is None else x.toPython() for x in row] for row in results),
        columns=[str(x) for x in results.vars],
    )

In [6]:
OUTPUT_FILE = "synonyms.json"
synonym_files = [
    {"variable": "ARCHIVES", "in":["archive.ttl"]},
    {"variable": "INTERPRETATION", "in":["interpretation.ttl"]},    
    {"variable": "PROXIES", "in":["paleo_proxy.ttl", "chron_proxy.ttl"]},
    {"variable": "UNITS", "in":["paleo_units.ttl", "chron_units.ttl"]},
    {"variable": "VARIABLES", "in":["paleo_variable.ttl", "chron_variable.ttl"]}
]

data = {}

for synonym_file in synonym_files:
    ttl_files = synonym_file["in"]
    var_name = synonym_file["variable"]

    syn_data = {}
    for ttl_file in ttl_files:
        graph = Graph()
        graph.parse(ttl_file)

        result = graph.query("""
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

        SELECT ?id ?type ?label ?noaa (GROUP_CONCAT(?syn;separator=" | ") as ?synonyms) WHERE {
            ?id rdfs:label ?label .
            ?id a ?type .
            FILTER( STRSTARTS(STR(?type),str(:))) .
            OPTIONAL { ?id :hasNoaaPastName ?noaa }
            OPTIONAL { ?id :hasSynonym ?syn }
        }
        GROUP BY ?id
        """)

        df = sparql_results_to_df(result)
        display(df)

        tmp = df.to_dict(orient="records")
        for line in tmp:
            label = line["label"]
            noaa = line["noaa"]
            id = line["id"]

            # Get the type
            type = re.sub("^.*#", "", line["type"])            
            if type not in syn_data:
                syn_data[type] = {}
            
            # Get the localname
            localname = re.sub("^.*#", "", id)

            # Add synonyms  
            line["synonyms"] = re.split("\s*\|\s*", line["synonyms"])
            line["synonyms"].insert(0, label.lower()) # Label is also a synonym
            line["synonyms"].insert(1, localname.lower()) # Localname is also a synonym
            if noaa:
                line["synonyms"].insert(2, noaa.lower()) # NOAA past name is also a synonym
            
            for syn in line["synonyms"]:
                if syn:
                    syn_data[type][syn.lower()] = {
                        "id": id,
                        "label": label
                    }
    
    data[var_name] = syn_data

with open(OUTPUT_FILE, "w") as ofile:
    json.dump(data, ofile, indent=3)

Unnamed: 0,id,type,label,noaa,synonyms
0,http://linked.earth/ontology/archive#Borehole,http://linked.earth/ontology/archive#ArchiveType,Borehole,borehole,
1,http://linked.earth/ontology/archive#Coral,http://linked.earth/ontology/archive#ArchiveType,Coral,coral,
2,http://linked.earth/ontology/archive#FluvialSe...,http://linked.earth/ontology/archive#ArchiveType,Fluvial sediment,,Creek | Fluvial | River | Stream
3,http://linked.earth/ontology/archive#GlacierIce,http://linked.earth/ontology/archive#ArchiveType,Glacier ice,ice cores,
4,http://linked.earth/ontology/archive#GroundIce,http://linked.earth/ontology/archive#ArchiveType,Ground ice,bulk ice,
5,http://linked.earth/ontology/archive#LakeSediment,http://linked.earth/ontology/archive#ArchiveType,Lake sediment,,Lagoon | Lake | Lake Sediment
6,http://linked.earth/ontology/archive#MarineSed...,http://linked.earth/ontology/archive#ArchiveType,Marine sediment,,Delta | Marine
7,http://linked.earth/ontology/archive#Midden,http://linked.earth/ontology/archive#ArchiveType,Midden,,
8,http://linked.earth/ontology/archive#MolluskShell,http://linked.earth/ontology/archive#ArchiveType,Mollusk shell,bivalve,MolluskShells
9,http://linked.earth/ontology/archive#Peat,http://linked.earth/ontology/archive#ArchiveType,Peat,peat,Bog | Fen | Marsh | Mire | Swamp


Unnamed: 0,id,type,label,noaa,synonyms
0,http://linked.earth/ontology/interpretation#C3...,http://linked.earth/ontology/interpretation#In...,C3C4Ratio,,composition C3-C4 plants
1,http://linked.earth/ontology/interpretation#ci...,http://linked.earth/ontology/interpretation#In...,circulationIndex,circulation index,MODE | NAO index
2,http://linked.earth/ontology/interpretation#ci...,http://linked.earth/ontology/interpretation#In...,circulationVariable,circulation variable,changes in monsoon intensity. | circulation | ...
3,http://linked.earth/ontology/interpretation#di...,http://linked.earth/ontology/interpretation#In...,dissolvedOxygen,dissolved oxygen,suboxia
4,http://linked.earth/ontology/interpretation#dust,http://linked.earth/ontology/interpretation#In...,dust,dust,DUST
...,...,...,...,...,...
172,http://linked.earth/ontology/interpretation#Su...,http://linked.earth/ontology/interpretation#In...,Summer,summer,summer | warm season | mostly summer | summer+...
173,http://linked.earth/ontology/interpretation#Wa...,http://linked.earth/ontology/interpretation#In...,Warmest Month,231Pa excess,warmest month | warmest
174,http://linked.earth/ontology/interpretation#We...,http://linked.earth/ontology/interpretation#In...,Wet Season,,monsoon | Andean wet season | Monsoon | monsoo...
175,http://linked.earth/ontology/interpretation#Wi...,http://linked.earth/ontology/interpretation#In...,Win-Spr,,winter/spring


Unnamed: 0,id,type,label,noaa,synonyms
0,http://linked.earth/ontology/proxy#accumulatio...,http://linked.earth/ontology/proxy#PaleoProxy,accumulation rate,,sed accumulation
1,http://linked.earth/ontology/proxy#ACL,http://linked.earth/ontology/proxy#PaleoProxy,ACL,average chain length,
2,http://linked.earth/ontology/proxy#Al2O3,http://linked.earth/ontology/proxy#PaleoProxy,Al2O3,aluminum oxide,
3,http://linked.earth/ontology/proxy#alkenone,http://linked.earth/ontology/proxy#PaleoProxy,alkenone,alkenone,
4,http://linked.earth/ontology/proxy#amoeba,http://linked.earth/ontology/proxy#PaleoProxy,amoeba,testate amoeba,
...,...,...,...,...,...
89,http://linked.earth/ontology/proxy#floral_asse...,http://linked.earth/ontology/proxy#PaleoProxyG...,floral assemblage,,
90,http://linked.earth/ontology/proxy#isotopic,http://linked.earth/ontology/proxy#PaleoProxyG...,isotopic,isotope,
91,http://linked.earth/ontology/proxy#mineral,http://linked.earth/ontology/proxy#PaleoProxyG...,mineral,,
92,http://linked.earth/ontology/proxy#pyrogenic,http://linked.earth/ontology/proxy#PaleoProxyG...,pyrogenic,fire history,


Unnamed: 0,id,type,label,noaa,synonyms


Unnamed: 0,id,type,label,noaa,synonyms
0,http://linked.earth/ontology/units#atomic_ratio,http://linked.earth/ontology/units#PaleoUnit,atomic ratio,atomic ratio,Atomic ratio
1,http://linked.earth/ontology/units#cgs,http://linked.earth/ontology/units#PaleoUnit,cgs,dimensionless (CGS system),
2,http://linked.earth/ontology/units#cm,http://linked.earth/ontology/units#PaleoUnit,cm,centimeter,cmblf
3,http://linked.earth/ontology/units#cm_kyr,http://linked.earth/ontology/units#PaleoUnit,cm/kyr,centimeter per kiloyear,
4,http://linked.earth/ontology/units#cm_yr,http://linked.earth/ontology/units#PaleoUnit,cm/yr,centimeter per year,cm/a | cm yr-1
...,...,...,...,...,...
66,http://linked.earth/ontology/units#yr_AD,http://linked.earth/ontology/units#PaleoUnit,yr AD,year Common Era,yr | CE | AD | year ce | ad/bc | cal yr AD | y...
67,http://linked.earth/ontology/units#yr_b2k,http://linked.earth/ontology/units#PaleoUnit,yr b2k,,b2000 | cal. BP2000 | Years before 2k
68,http://linked.earth/ontology/units#yr_BP,http://linked.earth/ontology/units#PaleoUnit,yr BP,calendar year before present,BP | cal years BP | cal year BP | cal yr BP | ...
69,http://linked.earth/ontology/units#yr_ka,http://linked.earth/ontology/units#PaleoUnit,yr ka,calendar kiloyear before present,ka


Unnamed: 0,id,type,label,noaa,synonyms


Unnamed: 0,id,type,label,noaa,synonyms
0,http://linked.earth/ontology/variables#ACL,http://linked.earth/ontology/variables#PaleoVa...,ACL,average chain length,ACL (27-33) | ACL25-35 | ACL27-31 | ACLC22-30 ...
1,http://linked.earth/ontology/variables#AET_PET,http://linked.earth/ontology/variables#PaleoVa...,AET/PET,,
2,http://linked.earth/ontology/variables#ARM_IRM,http://linked.earth/ontology/variables#PaleoVa...,ARM/IRM,anhysteretic remanent magnetization/isothermal...,arm_irm
3,http://linked.earth/ontology/variables#ARSTAN,http://linked.earth/ontology/variables#PaleoVa...,ARSTAN,ARSTAN chronology method,ARS
4,http://linked.earth/ontology/variables#Al,http://linked.earth/ontology/variables#PaleoVa...,Al,aluminum,Al peak area | AlProp
...,...,...,...,...,...
294,http://linked.earth/ontology/variables#waterCo...,http://linked.earth/ontology/variables#PaleoVa...,waterContent,water content,
295,http://linked.earth/ontology/variables#waterTa...,http://linked.earth/ontology/variables#PaleoVa...,waterTableDepth,water table depth,Water Table | Water Table Detrended | Water_ta...
296,http://linked.earth/ontology/variables#wetBulk...,http://linked.earth/ontology/variables#PaleoVa...,wetBulkDensity,,WetBD
297,http://linked.earth/ontology/variables#year,http://linked.earth/ontology/variables#PaleoVa...,year,year,Recon0x2EDate | Year | Year b2k | age_CE | yea...


Unnamed: 0,id,type,label,noaa,synonyms
