# SPARQL Mapping

In [1]:
from kif_lib import *
from kif_lib.vocabulary import pc, wd

### The mapping

In [2]:
from kif_lib.compiler.sparql.mapping import SPARQL_Mapping, register
from kif_lib.namespace import RDF, XSD
from kif_lib.namespace.semsci import CHEMINF, SIO
x, y, z = Variables('x', 'y', 'z')

In [3]:
class PubChemMapping(SPARQL_Mapping):

    @register(wd.canonical_SMILES(Item(x), String(y)))
    def wd_canonical_SMILES(self, c, x, y):
        if isinstance(y, c.Query.Literal):
            y = c.literal(str(y), 'en')
        attr = c.bnode()
        c.q.triples()(
            (x, SIO.has_attribute, attr),
            (attr, RDF.type, CHEMINF.canonical_smiles_generated_by_OEChem),
            (attr, SIO.has_value, y))

    @register(wd.mass(Item(x), Quantity(y, wd.gram_per_mole)))
    def wd_mass(self, c, x, y):
        if isinstance(y, c.Query.Literal):
            y = c.literal(y, datatype=XSD.float)
        attr = c.bnode()
        c.q.triples()(
            (x, SIO.has_attribute, attr),
            (attr, RDF.type, CHEMINF.molecular_weight_calculated_by_the_pubchem_software_library),
            (attr, SIO.has_value, y))
    
    @register(wd.instance_of(pc.Isotope_Atom_Count, wd.Wikidata_property_related_to_chemistry))
    def wd_instance_of_Isotope_Atom_Count(self, c):
        pass

    @register(pc.Isotope_Atom_Count(Item(x), Quantity(y)))
    def wd_Isotope_Atom_Count(self, c, x, y):
        if isinstance(y, c.Query.Literal):
            y = c.literal(y, datatype=XSD.float)
        attr = c.bnode()
        c.q.triples()(
            (x, SIO.has_attribute, attr),
            (attr, RDF.type, CHEMINF.isotope_atom_count_generated_by_pubchem_software_library),
            (attr, SIO.has_value, y))

### Example queries

In [4]:
import logging
logging.basicConfig(level=logging.ERROR)

In [5]:
import os
PUBCHEM = os.getenv('PUBCHEM')
assert PUBCHEM is not None, 'PUBCHEM envvar is not defined!'

In [6]:
kb = Store('sparql-mapper2', PUBCHEM, PubChemMapping())

In [7]:
def filter(*args, **kwargs):
    import time
    t0 = time.time()
    limit = kwargs.pop('limit', 3)
    for stmt in kb.filter(*args, **kwargs, limit=limit):
        display(stmt)
    print(f'{time.time() - t0:.1f}s')

In [8]:
filter(limit=3)

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488097](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488097)) (**ValueSnak** (**Property** [canonical SMILES](http://www.wikidata.org/entity/P233)) "CCCCCCCCC=CCCCCCCCCOCC1CO1"))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488169](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488169)) (**ValueSnak** (**Property** [canonical SMILES](http://www.wikidata.org/entity/P233)) "CC(=CCCC1(C2CCC(C2)C1=C)C)CO.CC(=CCCC1(C2CC3C1(C3C2)C)C)CO"))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488170](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488170)) (**ValueSnak** (**Property** [canonical SMILES](http://www.wikidata.org/entity/P233)) "CC(=CCCC1(C2CCC(C2)C1=C)C)CO"))

1.1s


In [9]:
filter(subject=pc.CID(241))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241)) (**ValueSnak** (**Property** [canonical SMILES](http://www.wikidata.org/entity/P233)) "C1=CC=CC=C1"))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 78.11 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

0.2s


In [10]:
filter(property=wd.mass)

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53487882](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53487882)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 466.6 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488097](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488097)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 324.5 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488169](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488169)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 440.7 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

0.2s


In [11]:
filter(value=Quantity('78.11', wd.gram_per_mole))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID143919](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID143919)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 78.11 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID85607149](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID85607149)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 78.11 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID163260](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID163260)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 78.11 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

0.2s


In [12]:
filter(property=wd.instance_of)

(**Statement** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**ValueSnak** (**Property** [instance of](http://www.wikidata.org/entity/P31)) (**Item** [Wikidata property related to chemistry](http://www.wikidata.org/entity/Q21294996))))

0.2s


In [13]:
filter(value=Quantity(1))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53496129](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53496129)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 1)))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53496258](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53496258)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 1)))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53649691](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53649691)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 1)))

0.2s


In [14]:
filter(property=pc.Isotope_Atom_Count)

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53487882](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53487882)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488097](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488097)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488169](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID53488169)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

0.2s


In [15]:
filter(subject=pc.CID(421), property=pc.Isotope_Atom_Count, value=Quantity(0))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID421](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID421)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

0.2s
