# SPARQL queries of CWLProv provenance files
This document provides an overview of different SPARQL queries, together with their (expected) result.

## Import modules & queries

In [2]:
import os
import rdflib
from pathlib import Path

In [3]:
cwd = Path(os.getcwd())
queries_dir = cwd / 'queries'

## Functions

In [4]:
def run_query(rdf_file, query_file):
    """
    file = RDF file; query_file = path to sparql query file.
    """
    g = rdflib.Graph()
    g.parse(rdf_file)
    with open(query_file, 'r')  as f:
        query = f.read()
        
    qres = g.query(query)
    
    return qres

## SPARQL queries

List all the Docker images used in this workflow run.

In [5]:
provenance_file = cwd / "labels_wf_primary_cwlprov.ttl" # replace this with provenance file of full workflow run
extract_images_query = queries_dir / "docker_images.sparql"
response = run_query(provenance_file, extract_images_query)
for row in response:
    print(f"{row.step} used {row.image}")


arcp://uuid,a914217a-5cd2-457d-85cc-7472eeb17bfd/workflow/packed.cwl#main/generate_pc7 used amancevice/pandas:1.3.4-slim


List every entity that has a DOI.

In [19]:
provenance_file = cwd / 'data_ann_ex1_primary.cwlprov.ttl' # replace this with provenance file of full workflow run
extract_doi_query = queries_dir / 'dois.sparql'
response = run_query(provenance_file, extract_doi_query)
for row in response:
    print(f"{row.doi}")

<doi>
<doi>
dataset_identifier


Extract the formats of all files for which this is specified.

In [18]:
provenance_file = cwd / 'data_ann_ex2_primary.cwlprov.ttl'
extract_format_query = queries_dir / 'format.sparql'
response = run_query(provenance_file, extract_format_query)
for row in response:
    print(f"{row.basename} has format {row.format}")

sabdab_summary_all_20220527.tsv has format https://www.iana.org/assignments/media-types/text/tab-separated-values
7mb7.cif has format http://edamontology.org/format_1477
7zxf.cif has format http://edamontology.org/format_1477


<rdflib.plugins.sparql.processor.SPARQLResult at 0x7fd6944bbfa0>