Create ISA objects for EATRIS-Plus multi-omics data set of Czech population cohort

Define ontology references

In [1]:
from isatools.model import *
ontologies = {
    "CHEBI": OntologySource(
        name = "CHEBI - Chemical Entities of Biological Interest", 
        file = "http://purl.obolibrary.org/obo/chebi.owl",
        description = "A structured classification of molecular entities of biological interest focusing on 'small' chemical compounds."), 
    "CRO": OntologySource(
        # The Contributor Role Ontology (CRO) is an extension of the CASRAI Contributor Roles Taxonomy (CRediT) and replaces the former Contribution Ontology.
        name = "CRO - Contributor Role Ontology",
        file = "http://purl.obolibrary.org/obo/cro.owl",
        description = "A classification of the diverse roles performed in the work leading to a published research output in the sciences. Its purpose to provide transparency in contributions to scholarly published work, to enable improved systems of attribution, credit, and accountability."),
    "EFO": OntologySource(
        name = "EFO - Experimental Factor Ontology", 
        file = "http://www.ebi.ac.uk/efo/efo.owl",
        description = "The Experimental Factor Ontology (EFO) provides a systematic description of many experimental variables available in EBI databases, and for external projects such as the NHGRI GWAS catalogue. It combines parts of several biological ontologies, such as anatomy, disease and chemical compounds. The scope of EFO is to support the annotation, analysis and visualization of data handled by many groups at the EBI and as the core ontology for OpenTargets.org"), 
    "MMO": OntologySource(
        name = "MMO - Measurement method ontology", 
        file = "http://purl.obolibrary.org/obo/mmo.owl",
        description = "A representation of the variety of methods used to make clinical and phenotype measurements."),
    "NCBITAXON": OntologySource(
        name = "NCBI organismal classification", 
        file = "http://purl.obolibrary.org/obo/ncbitaxon.owl",
        description = "An ontology representation of the NCBI organismal taxonomy"),
    "NCIT": OntologySource(
        name = "NCI Thesaurus OBO Edition", 
        file = "http://purl.obolibrary.org/obo/ncit.owl",
        description = "The NCIt OBO Edition project aims to increase integration of the NCIt with OBO Library ontologies. NCIt is a reference terminology that includes broad coverage of the cancer domain, including cancer related diseases, findings and abnormalities. NCIt OBO Edition releases should be considered experimental."),
    "OBI": OntologySource(
        name = "OBI - Ontology for Biomedical Investigations", 
        file = "http://purl.obolibrary.org/obo/obi.owl",
        description = "An integrated ontology for the description of life-science and clinical investigations"),
    "OMIABIS": OntologySource(
        name = "Ontologized MIABIS", 
        file = "http://purl.obolibrary.org/obo/omiabis.owl",
        description = "An ontological version of MIABIS (Minimum Information About BIobank data Sharing)"),
    "PRIDE": OntologySource(
        name = "PRIDE Controlled Vocabulary",
        file = "http://purl.obolibrary.org/obo/pride_cv.obo",
        description = "The PRIDE PRoteomics IDEntifications (PRIDE) database is a centralized, standards compliant, public data repository for proteomics data, including protein and peptide identifications, post-translational modifications and supporting spectral evidence."),
    "REX": OntologySource(
        name = "REX - Physico-chemical process",
        file = "http://purl.obolibrary.org/obo/rex.owl",
        description = "An ontology of physico-chemical processes, i.e. physico-chemical changes occurring in course of time."),
    "STATO": OntologySource(
        name = "STATO: the statistical methods ontology",
        file = "http://purl.obolibrary.org/obo/stato.owl",
        description = "STATO is the statistical methods ontology. It contains concepts and properties related to statistical methods, probability distributions and other concepts related to statistical analysis, including relationships to study designs and plots.")
}

Create investigation

In [2]:
investigation = Investigation(
    filename = "i_investigation.txt", 
    identifier = "", 
    title = "EATRIS-Plus - Flagship in Personalised Medicine",
    description = "EATRIS-Plus project aims to support the long-term sustainability of the European Research Infrastructure for Translational Medicine (EATRIS) by delivering innovative scientific tools to the research community, strengthening the EATRIS financial model, and reinforcing EATRIS’ leadership in the European Research Area in the field of Personalised Medicine (PM).",
    submission_date = "",
    public_release_date = "",
    ontology_source_references = [o for o in ontologies.values()],
    publications = None,
    contacts = [
        Person(
            last_name = "Keidong", 
            first_name = "Eliis",
            #mid_initials = "",
            affiliation = "EATRIS",
            roles = [
                OntologyAnnotation(
                    term = "project management role",
                    term_source = ontologies["CRO"], 
                    term_accession ="http://purl.obolibrary.org/obo/CRO_0000065")])],
    studies = None,
    comments = None)

Create study

In [3]:
cohort_study = Study(
    filename = "s_study.txt", 
    identifier = "", 
    title = "Multi-omics data of a Czech population cohort",
    description = "Multi-omics data of a Czech population cohort", 
    submission_date = "", 
    public_release_date = "",
    contacts = [
        Person(
            last_name = "Hajdúch", 
            first_name = "Marian",
            #mid_initials = "",
            affiliation = "Institute of Molecular and Translational Medicine (IMTM), Palacký University Olomouc")],
    design_descriptors = [
        OntologyAnnotation(
                term = "Multi-omics study",
                term_source = ontologies["PRIDE"],
                term_accession = "http://purl.obolibrary.org/obo/PRIDE_0000461"),
        OntologyAnnotation(
                term = "population based study design",
                term_source = ontologies["OMIABIS"],
                term_accession = "http://purl.obolibrary.org/obo/OMIABIS_0001022")], 
    factors = None, 
    protocols = None,
    assays = None,
    sources = None,
    samples = None,
    process_sequence = None,
    other_material = None,
    characteristic_categories = None,
    comments = None,
    units = None)
investigation.studies.append(cohort_study)

Define protocols

In [4]:
sample_collection_protocol = Protocol( 
    name = "sample_collection_protocol",
    protocol_type = OntologyAnnotation(
        term = "sample collection"
        #term_source = ontologies[""], 
        #term_accession = ""
    )) 

Define assays

In [5]:
assay_genomics_imtm = Assay(
    filename = "a_assay_genomics_imtm.txt") 
assay_dnamethylation_uu = Assay(
    filename = "a_assay_dnamethylation_uu.txt") 
assay_rnaseq_fimm = Assay(
    filename = "a_assay_rnaseq_fimm.txt") 
assay_mirnaseq_fimm = Assay(
    filename = "a_assay_mirnaseq_fimm.txt") 
assay_mirnaseq_sermas = Assay(
    filename = "a_assay_mirnaseq_sermas.txt") 
assay_proteomics_imtm = Assay(
    filename = "a_assay_proteomics_imtm.txt") 
assay_metabolomics_acylcarnitines_rumc = Assay(
    filename = "a_assay_metabolomics_acylcarnitines_rumc.txt") 
assay_metabolomics_aminoacids_mumc = Assay(
    filename = "a_assay_metabolomics_aminoacids_mumc.txt") 
assay_metabolomics_fattyacids_mumc = Assay(
    filename = "a_assay_metabolomics_fattyacids_mumc.txt") 

In [6]:
# add dummy samples
for source_idx in range(1, 4):
    # create source (=individual)
    source_name = "individual_{0}".format(source_idx)
    source = Source(
        name = source_name,
        characteristics = [
            Characteristic(
                category = OntologyAnnotation(
                    term = "Organism",
                    term_source = ontologies["OBI"],
                    term_accession = "http://purl.obolibrary.org/obo/OBI_0100026"),
                value = OntologyAnnotation(
                    term = "Homo sapiens",
                    term_source = ontologies["NCBITAXON"],
                    term_accession = "http://purl.obolibrary.org/obo/NCBITaxon_9606"))])
    investigation.studies[0].sources.append(source)
    # create sample
    sample_name = "sample_{0}".format(source_idx)
    sample = Sample(
        name = sample_name, 
        derives_from = source)
    sample_collection_process = Process(
        name = "samplecollection_{0}".format(source_idx),
        executes_protocol = sample_collection_protocol,
        inputs = [source],
        outputs = [sample])
    investigation.studies[0].process_sequence.append(sample_collection_process)

Add assays to study

In [10]:
#investigation.studies[0].assays.append()

Write ISA-Tab files

In [8]:
# create output directory
import os
out_dir = "."
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
# write to ISA-Tab
from isatools import isatab
isatab.dump(investigation, out_dir)

isatools.model.Investigation(identifier='', filename='i_investigation.txt', title='EATRIS-Plus - Flagship in Personalised Medicine', submission_date='', public_release_date='', ontology_source_references=[isatools.model.OntologySource(name='CHEBI - Chemical Entities of Biological Interest', file='http://purl.obolibrary.org/obo/chebi.owl', version='', description='A structured classification of molecular entities of biological interest focusing on 'small' chemical compounds.', comments=[]), isatools.model.OntologySource(name='CRO - Contributor Role Ontology', file='http://purl.obolibrary.org/obo/cro.owl', version='', description='A classification of the diverse roles performed in the work leading to a published research output in the sciences. Its purpose to provide transparency in contributions to scholarly published work, to enable improved systems of attribution, credit, and accountability.', comments=[]), isatools.model.OntologySource(name='EFO - Experimental Factor Ontology', file=

In [9]:
# write to ISA-JSON
import json
from isatools.isajson import ISAJSONEncoder
with open(os.path.join(out_dir, "isa.json"), "w") as out_file:
    json.dumps(investigation, 
               cls = ISAJSONEncoder, 
               sort_keys = True, 
               indent = 4, 
               separators = (',', ': '))