In [1]:
%endpoint https://sparql.uniprot.org/sparql

In [21]:
## Q1: How many protein records are in UniProt?

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX up: <http://purl.uniprot.org/core/>

SELECT (COUNT(?protein) AS ?count)
WHERE {
  ?protein a up:Protein .
}

count
378979161


In [23]:
## Q2: How many Arabidopsis thaliana protein records are in UniProt?
## Note: Tax-id of Arabidopsis thaliana is 3702!

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX up: <http://purl.uniprot.org/core/>
PREFIX taxonomy: <http://purl.uniprot.org/taxonomy/>

SELECT (COUNT(?protein) AS ?count)
WHERE {
  ?protein a up:Protein .
  ?protein up:organism taxonomy:3702 .
}

count
136447


In [22]:
## Q3: Can you retrieve pictures of Arabidopsis thaliana from UniProt? 

PREFIX foaf: <http://xmlns.com/foaf/0.1/> 
PREFIX up: <http://purl.uniprot.org/core/>
SELECT ?name ?picture                             
WHERE {
    ?entries  foaf:depiction  ?picture .               # Select all images
    ?entries  up:scientificName ?name .                # Select all scientific names
    FILTER CONTAINS(?name, "Arabidopsis thaliana") .   # Filter for 'Arabidopsis thaliana'
}

name,picture
Arabidopsis thaliana,https://upload.wikimedia.org/wikipedia/commons/3/39/Arabidopsis.jpg
Arabidopsis thaliana,https://upload.wikimedia.org/wikipedia/commons/thumb/6/60/Arabidopsis_thaliana_inflorescencias.jpg/800px-Arabidopsis_thaliana_inflorescencias.jpg


In [61]:
## Q4: What is the description of the enzyme activity of UniProt Protein Q9SZZ8?
## Note: I understand the documentation found in the link below in the way that rdfs:comment provides the description.
## Note: Sadly, I don't get any results, therefore i use rdfs:label.
## https://sparql.uniprot.org/sparql/?query=PREFIX%20up:%3chttp://purl.uniprot.org/core/%3e%20DESCRIBE%20up:activity%20FROM%20up:

PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX up:<http://purl.uniprot.org/core/>
PREFIX uniprot:<http://purl.uniprot.org/uniprot/> 

SELECT ?description
WHERE {
    uniprot:Q9SZZ8 up:enzyme ?enzyme .  # Select the enzyme
    ?enzyme up:activity ?activity .     # Select the enzyme activity
    ?activity rdfs:label ?description . # Select the activities description
}

description
all-trans-beta-carotene + 4 H(+) + 2 O2 + 4 reduced [2Fe-2S]-[ferredoxin] = all-trans-zeaxanthin + 2 H2O + 4 oxidized [2Fe-2S]-[ferredoxin].


In [86]:
## Q5: How to retrieve the proteins ids, and date of submission, for 5 proteins that have been added to UniProt this year 
## Note: I interpreted the question as "retrieve the 5 latest submissions".
## Note: To filter for a specific year (2020 in this case) the folloing line has to be added to the query
## FILTER(YEAR(?created) = 2020) .

PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>

SELECT ?id ?created
WHERE {
    ?protein a up:Protein .
    ?protein up:mnemonic ?id .
    ?protein up:created ?created .
    FILTER(YEAR(?created) = 2020) .
}
ORDER BY DESC(?created)      
LIMIT 5                  

id,created
A0A6V6GZ78_9EUKA,2020-12-02
A0A6V6HCP1_9EUKA,2020-12-02
A0A6V6HQN1_9EUKA,2020-12-02
A0A6V6I3L3_9EUKA,2020-12-02
A0A6V6JH45_9EUKA,2020-12-02


In [94]:
## Q6: How  many species are in the UniProt taxonomy?

PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>

SELECT (COUNT(DISTINCT ?species) AS ?speciesCount)
WHERE { 
    ?species a up:Taxon .
    ?species up:rank up:Species .
}

speciesCount
1995728


In [2]:
## Q7: How many species have at least one protein record?
## Note: Only difference to the question before is, that I select Species which are listed under Proteins.

PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX up: <http://purl.uniprot.org/core/>

SELECT (COUNT(DISTINCT ?species) AS ?speciesCount)
WHERE { 
    ?upProtein a up:Protein .         
    ?upProtein up:organism ?species .
    ?species a up:Taxon .
    ?species up:rank up:Species .
}

speciesCount
1078469


In [16]:
%endpoint https://sparql.uniprot.org/sparql
## Q8: How to find the AGI codes and gene names for all Arabidopsis thaliana  proteins that have a protein function annotation description that mentions “pattern formation”?
## Note: I use skos (Simple Knowledge Organization System) to get the gene names
## Note: I limit the shown hits to 5 for better readability.

PREFIX skos:<http://www.w3.org/2004/02/skos/core#> 
PREFIX up:<http://purl.uniprot.org/core/> 
PREFIX taxonomy:<http://purl.uniprot.org/taxonomy/> 

SELECT ?locus_name ?gene_name
WHERE {
    ?upProtein a up:Protein .
    ?upProtein up:organism taxonomy:3702 .                      # Select all Arabidopsis thaliana proteins
    ?upProtein up:classifiedWith ?goid .                        # Select the concept of the protein
    ?goid rdfs:label ?description .                             # Select the description (string) of the concept
    FILTER CONTAINS(LCASE(?description), "pattern formation") . # filter for the keywords
    ?upProtein up:encodedBy ?gene .                             # Select the gene the protein is encoded by
    ?gene up:locusName ?locus_name .                            # Get the locus name
    ?gene skos:prefLabel ?gene_name .                           # Get the gene name
}
LIMIT 5

locus_name,gene_name
At5g46700,TRN2
At1g20330,SMT2
At2g20120,COV1
At5g46700,TRN2
At5g13300,AGD3


In [10]:
%endpoint https://rdf.metanetx.org/sparql  

## Q9: What is the MetaNetX Reaction identifier (starts with “mnxr”) for the UniProt Protein uniprotkb:Q18A79?
## Note: This code is based on the code found in this manual: 
## https://edu.sib.swiss/pluginfile.php/8024/mod_resource/content/4/SWAT4HCLS%202019_MP.pdf

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX uniprotkb: <http://purl.uniprot.org/uniprot/> 
PREFIX mnx: <https://rdf.metanetx.org/schema/>

SELECT DISTINCT ?reac_id ?mnxr
WHERE {
    ?pept mnx:peptXref uniprotkb:Q18A79 . # Select all proteins for uniprotkb:Q18A79
    ?cata mnx:pept ?pept .                # Select the reaction specific peptide
    ?gpr mnx:cata ?cata ;                 # Select all reactions (gpr = Gene-Protein-Reaction)
         mnx:reac ?reac .
    ?reac mnx:mnxr ?mnxr .                # Get link to indepth information about the reaction identifier
    ?reac rdfs:label ?reac_id .           # Get the id of the reaction identifier as a string
}

reac_id,mnxr
mnxr165934,https://rdf.metanetx.org/reac/MNXR165934
mnxr145046c3,https://rdf.metanetx.org/reac/MNXR145046


In [1]:
## This must be executed on the https://rdf.metanetx.org/sparql endpoint
%endpoint https://rdf.metanetx.org/sparql  

## Q10:  What is the official locus name, and the MetaNetX Reaction identifier (mnxr…..) for the protein that has “glycine reductase” catalytic activity in Clostridium difficile (taxon 272563)?
## Note: To query the uniprot server, I utilized "service", which lets me use the uniprot server no matter the endpoint.

PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX mnx: <https://rdf.metanetx.org/schema/>
PREFIX up: <http://purl.uniprot.org/core/>
PREFIX taxonomy: <http://purl.uniprot.org/taxonomy/>

SELECT DISTINCT ?locus_name ?reac_id ?upProtein
WHERE
{
    service <http://sparql.uniprot.org/sparql> { 
        # Same as Q8:
        ?upProtein a up:Protein .
        ?upProtein up:organism taxonomy:272563 .
        ?upProtein up:encodedBy ?gene .
        ?gene up:locusName ?locus_name .
        ?upProtein up:classifiedWith ?go_info .
        ?go_info rdfs:label ?description .
        FILTER CONTAINS(LCASE(?description), "glycine reductase") .
    }
    
    # Same as Q9:
    ?pept mnx:peptXref ?upProtein . 
    ?cata mnx:pept ?pept .                
    ?gpr mnx:cata ?cata ;                 
         mnx:reac ?reac .
    ?reac rdfs:label ?reac_id .           
} 

locus_name,reac_id,upProtein
CD630_23540,mnxr157884c3,http://purl.uniprot.org/uniprot/Q185M5
CD630_23510,mnxr157884c3,http://purl.uniprot.org/uniprot/Q185M3
CD630_23520,mnxr157884c3,http://purl.uniprot.org/uniprot/Q185M6
CD630_23490,mnxr157884c3,http://purl.uniprot.org/uniprot/Q185M4
CD630_23480,mnxr157884c3,http://purl.uniprot.org/uniprot/Q185M1
CD630_23510,mnxr162774c3,http://purl.uniprot.org/uniprot/Q185M3
CD630_23520,mnxr162774c3,http://purl.uniprot.org/uniprot/Q185M6
CD630_23490,mnxr162774c3,http://purl.uniprot.org/uniprot/Q185M4
CD630_23480,mnxr162774c3,http://purl.uniprot.org/uniprot/Q185M1
