In [1]:
%%SPARQL
# Q1: How many protein records are in UniProt?

SELECT (COUNT(DISTINCT ?protein) AS ?count)
WHERE {
  ?protein a <http://purl.uniprot.org/core/Protein> .
}

UsageError: Cell magic `%%SPARQL` not found.


In [None]:
%%SPARQL
# Q2: How many Arabidopsis thaliana protein records are in UniProt?

SELECT (COUNT(DISTINCT ?protein) AS ?count)
WHERE {
  ?protein a <http://purl.uniprot.org/core/Protein> ;
           <http://purl.uniprot.org/core/organism> <http://purl.uniprot.org/taxonomy/3702> .
}

In [None]:
%%SPARQL
# Q3: Retrieve pictures of Arabidopsis thaliana from UniProt

SELECT DISTINCT ?image
WHERE {
  ?protein a <http://purl.uniprot.org/core/Protein> ;
           <http://purl.uniprot.org/core/organism> <http://purl.uniprot.org/taxonomy/3702> ;
           <http://purl.uniprot.org/core/encodedBy> ?gene ;
           <http://purl.uniprot.org/core/product> ?product .
  ?gene <http://purl.uniprot.org/core/geneName> ?name .
  ?product <http://purl.uniprot.org/core/comment> ?comment .
  ?comment <http://purl.uniprot.org/core/type> "FUNCTION" .
  ?protein <http://purl.uniprot.org/core/image> ?image .
}

In [None]:
%%SPARQL
# Q4: What is the description of the enzyme activity of UniProt Protein Q9SZZ8

SELECT ?activity
WHERE {
  <http://purl.uniprot.org/uniprot/Q9SZZ8> <http://purl.uniprot.org/core/catalyticActivity> ?activity .
}


In [None]:
%%SPARQL
# Q5: Retrieve the proteins ids, and date of submission, for 5 proteins that have been added to UniProt this year

SELECT DISTINCT ?protein ?date
WHERE {
  ?protein a <http://purl.uniprot.org/core/Protein> .
  ?protein <http://purl.uniprot.org/core/created> ?date .
  FILTER(YEAR(?date) = 2021)
}
LIMIT 5


In [None]:
%%SPARQL
# Q6: How many species are in the UniProt taxonomy?

SELECT (COUNT(DISTINCT ?species) AS ?count)
WHERE {
  ?species a <http://purl.uniprot.org/core/Taxon> .
}


In [None]:
%%SPARQL
# Q7: How many species have at least one protein record?

SELECT (COUNT(DISTINCT ?species) AS ?count)
WHERE {
  ?protein a <http://purl.uniprot.org/core/Protein> .
  ?protein <http://purl.uniprot.org/core/organism> ?species .
}


In [None]:
%%SPARQL

# Q8: Find the AGI codes and gene names for all Arabidopsis thaliana proteins that have a protein function annotation description that mentions "pattern formation"

SELECT DISTINCT ?protein ?agi ?gene
WHERE {
  ?protein a <http://purl.uniprot.org/core/Protein> ;
           <http://purl.uniprot.org/core/organism> <http://purl.uniprot.org/taxonomy/3702> ;
           <http://purl.uniprot.org/core/encodedBy> ?gene .
  ?gene <http://purl.uniprot.org/core/geneName> ?name .
  ?gene <http://purl.uniprot.org/core/crossReference> ?xref .
  ?xref <http://purl.uniprot.org/core/database> <http://purl.uniprot.org/core/AGI> .
  ?xref <http://purl.uniprot.org/core/identifier> ?agi .
  ?protein <http://purl.uniprot.org/core/function> ?function .
  ?function <http://purl.uniprot.org/core/text> ?text .
  FILTER(CONTAINS(?text, "pattern formation"))
}


In [None]:
# Q9: What is the MetaNetX Reaction identifier (starts with "mnxr") for the UniProt Protein uniprotkb:Q18A79

SELECT ?mnxr
WHERE {
  <http://purl.uniprot.org/uniprot/Q18A79> <http://purl.uniprot.org/core/encodedBy> ?gene .
  ?gene <http://purl.uniprot.org/core/crossReference> ?xref .
  ?xref <http://purl.uniprot.org/core/database> <http://identifiers.org/ec-code> .
  ?xref <http://purl.uniprot.org/core/identifier> ?ec .
  ?metabolite <http://purl.metanetx.org/mnxm/EC> ?ec .
  ?metabolite <http://purl.metanetx.org/mnxm/reaction> ?mnxr .
}

In [None]:
# Q9: What is the MetaNetX Reaction identifier (starts with "mnxr") for the UniProt Protein uniprotkb:Q18A79

SELECT ?mnxr
WHERE {
  <http://purl.uniprot.org/uniprot/Q18A79> <http://purl.uniprot.org/core/encodedBy> ?gene .
  ?gene <http://purl.uniprot.org/core/crossReference> ?xref .
  ?xref <http://purl.uniprot.org/core/database> <http://identifiers.org/ec-code> .
  ?xref <http://purl.uniprot.org/core/identifier> ?ec .
  ?metabolite <http://purl.metanetx.org/mnxm/EC> ?ec .
  ?metabolite <http://purl.metanetx.org/mnxm/reaction> ?mnxr .
}

In [None]:
Copy code
# Q10: What is the official locus name, and the MetaNetX Reaction identifier (mnxr…) for the protein that has "glycine reductase" catalytic activity in Clostridium difficile (taxon 272563). (this must be executed on the https://rdf.metanetx.org/sparql endpoint)

SELECT DISTINCT ?locus ?mnxr
WHERE {
  ?protein a <http://purl.uniprot.org/core/Protein> ;
           <http://purl.uniprot.org/core/organism> <http://purl.uniprot.org/taxonomy/272563> ;
           <http://purl.uniprot.org/core/catalyticActivity> ?activity .
  ?activity <http://purl.uniprot.org/core/text> "Catalyzes the reduction of glycine to formaldehyde and ammonia."@en .
  ?protein <http://purl.uniprot.org/core/encodedBy> ?gene .
  ?gene <http://purl.uniprot.org/core/locusName> ?locus .
  ?gene <http://purl.uniprot.org/core/crossReference> ?xref .
  ?xref <http://purl.uniprot.org/core/database> <http://identifiers.org/ec-code> .
  ?xref <http://purl.uniprot.org/core/identifier> ?ec .
  ?metabolite <http://purl.metanetx.org/mnxm/EC> ?ec .
  ?metabolite <http://purl.metanetx.org/mnxm/reaction> ?mnxr .
}
The results of this query will depend on the data in the UniProt and MetaNetX databases and may vary over time.




Alberto Glez
does it works nowadays? (21/12/2022)
I'm


