In [1]:
# Uncomment to see the SPARQL queries.
#import logging
#logging.basicConfig(level=logging.DEBUG)

# SPARQL Mapping

In [2]:
from kif_lib import *
from kif_lib.vocabulary import pc, wd

### The mapping

In [3]:
from kif_lib.compiler.sparql.mapping import SPARQL_Mapping, register
from kif_lib.namespace import RDF, XSD
from kif_lib.namespace.semsci import CHEMINF, SIO
x, y, z = Variables('x', 'y', 'z')

In [4]:
class PubChemMapping(SPARQL_Mapping):

    @register([wd.canonical_SMILES(Item(x), String(y))])
    def wd_canonical_SMILES(self, c, x, y):
        if isinstance(y, c.Query.Literal):
            y = c.literal(str(y), 'en')
        attr = c.bnode()
        c.q.triples()(
            (x, SIO.has_attribute, attr),
            (attr, RDF.type, CHEMINF.canonical_smiles_generated_by_OEChem),
            (attr, SIO.has_value, y))

    @register([wd.mass(Item(x), Quantity(y, wd.gram_per_mole))])
    def wd_mass(self, c, x, y):
        if isinstance(y, c.Query.Literal):
            y = c.literal(y, datatype=XSD.float)
        attr = c.bnode()
        c.q.triples()(
            (x, SIO.has_attribute, attr),
            (attr, RDF.type, CHEMINF.molecular_weight_calculated_by_the_pubchem_software_library),
            (attr, SIO.has_value, y))
    
    @register([wd.instance_of(pc.Isotope_Atom_Count, wd.Wikidata_property_related_to_chemistry)])
    def wd_instance_of_Isotope_Atom_Count(self, c):
        pass

    @register([pc.Isotope_Atom_Count(Item(x), Quantity(y))])
    def wd_Isotope_Atom_Count(self, c, x, y):
        if isinstance(y, c.Query.Literal):
            y = c.literal(y, datatype=XSD.float)
        attr = c.bnode()
        c.q.triples()(
            (x, SIO.has_attribute, attr),
            (attr, RDF.type, CHEMINF.isotope_atom_count_generated_by_pubchem_software_library),
            (attr, SIO.has_value, y))

### Example queries

In [5]:
import logging
logging.basicConfig(level=logging.ERROR)

In [6]:
import os
PUBCHEM = os.getenv('PUBCHEM')
assert PUBCHEM is not None, 'PUBCHEM envvar is not defined!'

In [7]:
kb = Store('sparql2', PUBCHEM, PubChemMapping())

In [8]:
def filter(*args, **kwargs):
    import time
    t0 = time.time()
    limit = kwargs.pop('limit', 3)
    for stmt in kb.filter(*args, **kwargs, limit=limit):
        display(stmt)
    print(f'{time.time() - t0:.1f}s')

In [9]:
filter(limit=3)

(**Statement** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**ValueSnak** (**Property** [instance of](http://www.wikidata.org/entity/P31)) (**Item** [Wikidata property related to chemistry](http://www.wikidata.org/entity/Q21294996))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002945](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002945)) (**ValueSnak** (**Property** [canonical SMILES](http://www.wikidata.org/entity/P233)) "CC1CN(CC(N1)C)C2=C(C(=C3C(=C2F)N(C=C(C3=O)C(=O)O)C4CC4)N)F.C1=CC(=CC(=C1)F)CC(C(=O)O)N"))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002946](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002946)) (**ValueSnak** (**Property** [canonical SMILES](http://www.wikidata.org/entity/P233)) "CC(C(C(=O)NS(=O)(=O)OCC1C(C(C(O1)CCN2N=C(N=N2)C3=CC=C(C=C3)OC4=CC=CC=C4)O)O)N)OC"))

0.7s


In [10]:
filter(subject=pc.CID(241))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241)) (**ValueSnak** (**Property** [canonical SMILES](http://www.wikidata.org/entity/P233)) "C1=CC=CC=C1"))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 78.11 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID241)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

0.2s


In [11]:
filter(property=wd.mass)

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002944](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002944)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 556.6 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002945](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002945)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 575.6 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002946](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002946)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 592.6 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

0.2s


In [12]:
filter(value=Quantity('78.11', wd.gram_per_mole))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID143919](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID143919)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 78.11 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID602958](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID602958)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 78.11 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID163260](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID163260)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 78.11 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

0.2s


In [13]:
filter(property=wd.instance_of)

(**Statement** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**ValueSnak** (**Property** [instance of](http://www.wikidata.org/entity/P31)) (**Item** [Wikidata property related to chemistry](http://www.wikidata.org/entity/Q21294996))))

0.1s


In [14]:
#filter(value=Quantity(1))

In [15]:
filter(property=pc.Isotope_Atom_Count)

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002944](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002944)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002945](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002945)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002946](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID3002946)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

0.2s


In [16]:
filter(subject=pc.CID(421), property=pc.Isotope_Atom_Count, value=Quantity(0))

(**Statement** (**Item** [http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID421](http://rdf.ncbi.nlm.nih.gov/pubchem/compound/CID421)) (**ValueSnak** (**Property** [http://semanticscience.org/resource/CHEMINF_000372](http://semanticscience.org/resource/CHEMINF_000372)) (**Quantity** 0)))

0.2s
