In [1]:
# Uncomment to see the SPARQL queries.
#import logging
#logging.basicConfig(level=logging.DEBUG)

# PubChem

In [2]:
from kif_lib import *
from kif_lib.store.mapping import PubChemMapping
from kif_lib.vocabulary import wd

## PubChemMapping

In [3]:
pubchem = PubChemMapping()
for i, k in enumerate(pubchem.specs, 1):
    print(f'#{i}', wd.get_entity_label(k))

#1 canonical SMILES
#2 chemical formula
#3 CAS Registry Number
#4 ChEBI ID
#5 ChEMBL ID
#6 described by source
#7 has part
#8 InChI
#9 InChIKey
#10 instance of
#11 isomeric SMILES
#12 mass
#13 manufacturer
#14 PubChem CID
#15 stereoisomer of
#16 trading name
#17 author name string
#18 main subject
#19 patent number
#20 publication date
#21 sponsor
#22 title
#23 official website
#24 short name


## Showcase

In [4]:
import os
PUBCHEM = os.getenv('PUBCHEM')
assert PUBCHEM is not None, 'you need to define the PUBCHEM env var!'

In [5]:
kb = Store('sparql-mapper', PUBCHEM, pubchem)

In [6]:
def get_descriptor(*args, **kwargs):
    for entity, desc in kb.get_descriptor(*args, **kwargs):
        display(entity)
        display(desc)

In [7]:
def filter(*args, **kwargs):
    for stmt in kb.filter(*args, **kwargs):
        display(stmt)

### Compounds

#### Descriptor

In [8]:
get_descriptor(pubchem.compound(241))

(**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241))

(**ItemDescriptor**
- "\[6]annulene"@en
- (**TextSet**
  - "(6)annulene"@en
  - "1,3,5-cyclohexatriene"@en
  - "1,3-cyclohexadiene-5,6-diylradical"@en
  - "3,4-dnh"@en
  - "BENZENE"@en
  - "BNZ"@en
  - "RNG"@en
  - "\[6]-annulene"@en
  - "annulene"@en
  - "aromatic alkane"@en
  - "bdbm50167939"@en
  - "benzeen"@en
  - "benzeen \[dutch]"@en
  - "benzen"@en
  - "benzen \[polish]"@en
  - "benzene"@en
  - "benzene (including benzene from gasoline)"@en
  - "benzene + aniline combo"@en
  - "benzene 10 microg/ml in methanol"@en
  - "benzene 100 microg/ml in methanol"@en
  - "benzene 5000 microg/ml in methanol"@en
  - "benzene \[un1114]  \[flammable liquid]"@en
  - "benzene acs grade"@en
  - "benzene solution"@en
  - "benzene, acs reagent, >=99.0%"@en
  - "benzene, acs spectrophotometric grade, >=99%"@en
  - "benzene, analytical standard"@en
  - "benzene, anhydrous, 99.8%"@en
  - "benzene, ar, >=99.5%"@en
  - "benzene, for hplc"@en
  - "benzene, for hplc, >=99.8%"@en
  - "benzene, for hplc, >=99.9%"@en
  - "benzene, for residue analysis, suitable for 5000 per jis"@en
  - "benzene, jis special grade, >=99.5%"@en
  - "benzene, labeled with carbon-14 and tritium"@en
  - "benzene, lr, >=99%"@en
  - "benzene, pharmaceutical secondary standard; certified reference material"@en
  - "benzene, pure"@en
  - "benzene, purification grade"@en
  - "benzene, puriss. p.a., reag. ph. eur., >=99.7%"@en
  - "benzene, puriss., absolute, over molecular sieve (h2o <=0.005%), >=99.5% (gc)"@en
  - "benzene, purum, >=99.0% (gc)"@en
  - "benzene, reagentplus(r), thiophene free, >=99%"@en
  - "benzene, saj first grade, >=99.0%"@en
  - "benzene, suitable for 1000 per jis, >=99.5%, for residue analysis"@en
  - "benzene, suitable for 300 per jis, >=99.5%, for residue analysis"@en
  - "benzene-"@en
  - "benzin"@en
  - "benzin (obs.)"@en
  - "benzine"@en
  - "benzine (obs.)"@en
  - "benzinum"@en
  - "benzol"@en
  - "benzol 90"@en
  - "benzol diluent"@en
  - "benzole"@en
  - "benzolene"@en
  - "benzolo"@en
  - "benzolo \[italian]"@en
  - "benzolum"@en
  - "bicarburet of hydrogen"@en
  - "bm 613"@en
  - "bnz"@en
  - "carbon oil"@en
  - "caswell no. 077"@en
  - "cc-34,(+/-)"@en
  - "ccris 70"@en
  - "coal naphtha"@en
  - "cyclohexatriene"@en
  - "dsstox\_cid\_135"@en
  - "dsstox\_rid\_79433"@en
  - "ec 200-753-7"@en
  - "epa pesticide chemical code 008801"@en
  - "erythro-phenyl-2-piperidyl-carbinol,(-)"@en
  - "fenzen"@en
  - "fenzen \[czech]"@en
  - "hsdb 35"@en
  - "mfcd00003009"@en
  - "mineral naphtha"@en
  - "motor benzol"@en
  - "nci-c55276"@en
  - "nitration benzene"@en
  - "p-benzene"@en
  - "ph-h"@en
  - "phene"@en
  - "phenyl hydride"@en
  - "phenyl; phenyl radical"@en
  - "pyrobenzole"@en
  - "rcra waste number u019"@en
  - "rng"@en
  - "trans-n, n-dimethylphenylcyclopropylamine"@en
  - "trans-n-methylphenylcyclopropylamine"@en
  - "un 1114"@en
  - "un1114"@en
  - "wln: rh"@en
  - "{\[6]annulene}"@en)
- "A six-carbon aromatic annulene in which each carbon atom donates one of its two 2p electrons into a delocalised pi system. A toxic, flammable liquid byproduct of coal distillation, it is used as an industrial solvent. Benzene is a carcinogen that also damages bone marrow and the central nervous system."@en)

#### Class

In [9]:
filter(pubchem.compound(241), wd.instance_of, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [instance of](http://www.wikidata.org/entity/P31)) (**Item** [type of a chemical entity](http://www.wikidata.org/entity/Q113145171))))

#### Chemical formula

In [10]:
filter(pubchem.compound(241), wd.chemical_formula, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [chemical formula](http://www.wikidata.org/entity/P274)) "C6H6"))

#### Canonical SMILES

In [11]:
filter(pubchem.compound(241), wd.canonical_SMILES, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [canonical SMILES](http://www.wikidata.org/entity/P233)) "C1=CC=CC=C1"))

#### Isomeric SMILES

In [12]:
filter(pubchem.compound(241), wd.isomeric_SMILES, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [isomeric SMILES](http://www.wikidata.org/entity/P2017)) "C1=CC=CC=C1"))

#### InChI

In [13]:
filter(pubchem.compound(241), wd.InChI, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [InChI](http://www.wikidata.org/entity/P234)) "InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"))

#### Tranding names

In [14]:
filter(None, wd.trading_name, Text('tylenol'), limit=3)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID1983](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID1983)) (**ValueSnak** (**Property** [trading name](http://www.wikidata.org/entity/P6427)) "tylenol"@en))

#### Mass

In [15]:
filter(pubchem.compound(241), wd.mass, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [mass](http://www.wikidata.org/entity/P2067)) (**Quantity** 78.11000061035156 (**Item** [gram per mole](http://www.wikidata.org/entity/Q28924752)))))

#### Stereoisomers

In [16]:
filter(pubchem.compound(4422), wd.stereoisomer_of, limit=3)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID4422](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID4422)) (**ValueSnak** (**Property** [stereoisomer of](http://www.wikidata.org/entity/P3364)) (**Item** [wd:Q_PUBCHEM_COMPOUND_CID41321](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID41321))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID4422](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID4422)) (**ValueSnak** (**Property** [stereoisomer of](http://www.wikidata.org/entity/P3364)) (**Item** [wd:Q_PUBCHEM_COMPOUND_CID41321](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID41321))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID4422](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID4422)) (**ValueSnak** (**Property** [stereoisomer of](http://www.wikidata.org/entity/P3364)) (**Item** [wd:Q_PUBCHEM_COMPOUND_CID5284594](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID5284594))))

#### Parts

In [17]:
filter(None, wd.has_part, pubchem.compound(241), limit=3)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID135539531](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID135539531)) (**ValueSnak** (**Property** [has part](http://www.wikidata.org/entity/P527)) (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID135539531](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID135539531)) (**ValueSnak** (**Property** [has part](http://www.wikidata.org/entity/P527)) (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID66626154](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID66626154)) (**ValueSnak** (**Property** [has part](http://www.wikidata.org/entity/P527)) (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241))))

#### Vendors

In [18]:
filter(pubchem.compound(241), wd.manufacturer, limit=3)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [manufacturer](http://www.wikidata.org/entity/P176)) (**Item** [wd:Q_PUBCHEM_SOURCE_Tractus](http://www.wikidata.org/entity/Q_PUBCHEM_SOURCE_Tractus))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [manufacturer](http://www.wikidata.org/entity/P176)) (**Item** [wd:Q_PUBCHEM_SOURCE_ID15747](http://www.wikidata.org/entity/Q_PUBCHEM_SOURCE_ID15747))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [manufacturer](http://www.wikidata.org/entity/P176)) (**Item** [wd:Q_PUBCHEM_SOURCE_ID1198](http://www.wikidata.org/entity/Q_PUBCHEM_SOURCE_ID1198))))

#### Patents

In [19]:
filter(pubchem.compound(241), wd.described_by_source, wd.instance_of(wd.patent), limit=3)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [described by source](http://www.wikidata.org/entity/P1343)) (**Item** [wd:Q_PUBCHEM_PATENT_US-2013260092-A1](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_US-2013260092-A1))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [described by source](http://www.wikidata.org/entity/P1343)) (**Item** [wd:Q_PUBCHEM_PATENT_US-2007135652-A1](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_US-2007135652-A1))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [described by source](http://www.wikidata.org/entity/P1343)) (**Item** [wd:Q_PUBCHEM_PATENT_US-2008233410-A1](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_US-2008233410-A1))))

#### Median/minimal lethal dose

In [20]:
filter(pubchem.compound(241), wd.median_lethal_dose, limit=3)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [median lethal dose (LD50)](http://www.wikidata.org/entity/P2240)) (**Quantity** 930 (**Item** [milligram per kilogram](http://www.wikidata.org/entity/Q21091747)))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [median lethal dose (LD50)](http://www.wikidata.org/entity/P2240)) (**Quantity** 1100 (**Item** [microgram per kilogram](http://www.wikidata.org/entity/Q107313731)))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [median lethal dose (LD50)](http://www.wikidata.org/entity/P2240)) (**Quantity** 4700 (**Item** [milligram per kilogram](http://www.wikidata.org/entity/Q21091747)))))

In [21]:
filter(pubchem.compound(241), wd.minimal_lethal_dose, limit=3)

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [minimal lethal dose](http://www.wikidata.org/entity/P2300)) (**Quantity** 194 (**Item** [milligram per kilogram](http://www.wikidata.org/entity/Q21091747)))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [minimal lethal dose](http://www.wikidata.org/entity/P2300)) (**Quantity** 2 (**Item** [gram per kilogram](http://www.wikidata.org/entity/Q21061369)))))

(**Statement** (**Item** [wd:Q_PUBCHEM_COMPOUND_CID241](http://www.wikidata.org/entity/Q_PUBCHEM_COMPOUND_CID241)) (**ValueSnak** (**Property** [minimal lethal dose](http://www.wikidata.org/entity/P2300)) (**Quantity** 88 (**Item** [milligram per kilogram](http://www.wikidata.org/entity/Q21091747)))))

### Patents

#### Descriptor

In [22]:
get_descriptor(pubchem.patent('CA-2009529-C'))

(**Item** [wd:Q_PUBCHEM_PATENT_CA-2009529-C](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_CA-2009529-C))

(**ItemDescriptor**
- "Servicing interrupt requests in a data processing system without using the services of an operating system"@en
- *no aliases*
- "Abstract The functions of two virtual operating systems (e.g., S/370 VM, VSE or IX370 and S/88 OS) are merged into one physical system. Partner pairs of S/88 processors run the S/88 OS and handle the fault tolerant and single system image aspects of the system.  One or more partner pairs of S/370 processors are coupled to corresponding S/88 processors directly and through the S/88 bus. Each S/370 processor is allocated from 1 to 16 megabytes of contiguous storage from the S/88 main storage.  Each S/370 virtual operating system thinks its memory allocation starts at address 0, and it manages its memory through normal S/370 dynamic memory allocation and paging techniques.  The S/370 is limit checked to prevent the S/370 from accessing S/88 memory space.  The S/88 Operating System is the master over all system hardware and I/O devices.  The S/88 processors access the S/370 address space in direct response to a S/88 application program so that the S/88 may move I/O data into the S/370 I/O buffers and process the S/370 I/O operations.  The S/88 and S/370 peer processor pairs execute their respective Operating Systems in a single system environment without significant rewriting of either operating system.  Neither operating system is aware of the other operating system nor the other processor pairs."@en)

#### Class

In [23]:
filter(pubchem.patent('CA-2009529-C'), wd.instance_of, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_PATENT_CA-2009529-C](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_CA-2009529-C)) (**ValueSnak** (**Property** [instance of](http://www.wikidata.org/entity/P31)) (**Item** [patent](http://www.wikidata.org/entity/Q253623))))

#### Number

In [24]:
filter(pubchem.patent('CA-2009529-C'), wd.patent_number, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_PATENT_CA-2009529-C](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_CA-2009529-C)) (**ValueSnak** (**Property** [patent number](http://www.wikidata.org/entity/P1246)) "CA-2009529-C"))

#### Title

In [25]:
filter(pubchem.patent('CA-2009529-C'), wd.title, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_PATENT_CA-2009529-C](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_CA-2009529-C)) (**ValueSnak** (**Property** [title](http://www.wikidata.org/entity/P1476)) "Servicing interrupt requests in a data processing system without using the services of an operating system"@en))

#### Publication date

In [26]:
filter(pubchem.patent('CA-2009529-C'), wd.publication_date, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_PATENT_CA-2009529-C](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_CA-2009529-C)) (**ValueSnak** (**Property** [publication date](http://www.wikidata.org/entity/P577)) (**Time** 1994-07-12)))

#### Sponsors

In [27]:
filter(pubchem.patent('CA-2009529-C'), wd.sponsor, limit=3)

(**Statement** (**Item** [wd:Q_PUBCHEM_PATENT_CA-2009529-C](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_CA-2009529-C)) (**ValueSnak** (**Property** [sponsor](http://www.wikidata.org/entity/P859)) "IBM"))

#### Authors

In [28]:
filter(pubchem.patent('CA-2009529-C'), wd.author_name_string, limit=3)

(**Statement** (**Item** [wd:Q_PUBCHEM_PATENT_CA-2009529-C](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_CA-2009529-C)) (**ValueSnak** (**Property** [author name string](http://www.wikidata.org/entity/P2093)) "GRICE LONNIE E"))

(**Statement** (**Item** [wd:Q_PUBCHEM_PATENT_CA-2009529-C](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_CA-2009529-C)) (**ValueSnak** (**Property** [author name string](http://www.wikidata.org/entity/P2093)) "SANDERSON KENNETH R"))

(**Statement** (**Item** [wd:Q_PUBCHEM_PATENT_CA-2009529-C](http://www.wikidata.org/entity/Q_PUBCHEM_PATENT_CA-2009529-C)) (**ValueSnak** (**Property** [author name string](http://www.wikidata.org/entity/P2093)) "DINWIDDIE JOHN M JR"))

### Vendors

#### Descriptor

In [29]:
get_descriptor(pubchem.source('ID15739'))

(**Item** [wd:Q_PUBCHEM_SOURCE_ID15739](http://www.wikidata.org/entity/Q_PUBCHEM_SOURCE_ID15739))

(**ItemDescriptor**
- "15739"@en
- (**TextSet**
  - "DC Chemicals"@en)
- *no description*)

#### Class

In [30]:
filter(pubchem.source('ID15739'), wd.instance_of, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_SOURCE_ID15739](http://www.wikidata.org/entity/Q_PUBCHEM_SOURCE_ID15739)) (**ValueSnak** (**Property** [instance of](http://www.wikidata.org/entity/P31)) (**Item** [business](http://www.wikidata.org/entity/Q4830453))))

#### Short name

In [31]:
filter(pubchem.source('ID15739'), wd.short_name, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_SOURCE_ID15739](http://www.wikidata.org/entity/Q_PUBCHEM_SOURCE_ID15739)) (**ValueSnak** (**Property** [short name](http://www.wikidata.org/entity/P1813)) "DC Chemicals"))

#### Website

In [32]:
filter(pubchem.source('ID15739'), wd.official_website, limit=1)

(**Statement** (**Item** [wd:Q_PUBCHEM_SOURCE_ID15739](http://www.wikidata.org/entity/Q_PUBCHEM_SOURCE_ID15739)) (**ValueSnak** (**Property** [official website](http://www.wikidata.org/entity/P856)) [http://www.dcchemicals.com](http://www.dcchemicals.com)))