# Create ISA objects for EATRIS-Plus multi-omics data set of Czech population cohort

See example: https://isatools.readthedocs.io/en/latest/example-createSimpleISAtab.html

In [1]:
from ftplib import FTP
from io import BytesIO
r = BytesIO()
# ftp://ftp.ebi.ac.uk/pub/databases/metabolights/eb-eye/eb-eye_metabolights_complete.xml
ftp = FTP("ftp.ebi.ac.uk")
ftp.login("anonymous")
#ftp.cwd("pub/databases/metabolights/submissionTool/configurations")
#ftp.cwd("pub/databases/metabolights/eb-eye")
ftp.cwd("pub/databases/metabolights")
ftp.retrlines("LIST")
ftp.retrbinary('RETR README.txt', r.write)
print(r.getvalue())
ftp.cwd("submissionTool")
ftp.retrlines("LIST")
#with open("eb-eye_metabolights_complete.xml", "wb") as f:
#    ftp.retrbinary('RETR eb-eye_metabolights_complete.xml', f.write)
ftp.quit()

drwxr-xr-x  155 ftp      ftp         16384 May 26  2020 IPO
-rw-rwxr-x    1 ftp      ftp           264 Dec 14  2011 README.txt
drwxr-xr-x    2 ftp      ftp          4096 Apr 29  2019 classyfire
drwxr-xr-x    2 ftp      ftp          4096 Jun 06  2018 compounds
drwxr-xr-x    5 ftp      ftp          4096 Sep 24  2020 derived
drwxrwxrwx    3 ftp      ftp          4096 Nov 04  2020 documentation
drwxrwxrwx    2 ftp      ftp          4096 Nov 02 02:55 eb-eye
drwxrwxrwx    2 ftp      ftp          4096 Nov 02  2017 eb-eye-dev
drwxrwxrwx    2 ftp      ftp          4096 Oct 31  2017 eb-eye-test
drwxrwxrwx    2 ftp      ftp          4096 Jun 09  2017 lipidhome
drwxrwxrwx    2 ftp      ftp          4096 May 20  2019 resources
drwxrwxr-x    4 ftp      ftp          4096 Sep 14  2016 studies
drwxr-xr-x    3 ftp      ftp          4096 Mar 22  2021 study_file_extensions
drwxrwxr-x    5 ftp      ftp          4096 May 02  2019 submissionTool
lrwxrwxrwx    1 ftp      ftp             8 Nov 05  2015 xml_fee

'221 Goodbye.'

## Define ontology references

Onotologies in (FAIR genomes metadata schema)[https://github.com/fairgenomes/fairgenomes-semantic-model] include AFR, AFRL, DC (Dublin Core), DUO (Data Use Ontology), EDAM, EFO, FG, FIX, GAZ, GENEPIO, GO, GSSO, HANCESTRO, HGNC, HL7, HP, IAO (Information Artifact Ontology), ICO (Informed Consent Ontology), LOINC, NCIT, OBI, OMIABIS, Orphanet, PATO, RO (Relation Ontology), SIO (Semanticscience Integrated Ontology), SNOMEDCT, SWO, UATC.

We reuse these as much as possible.

In [2]:
from isatools.model import *
ontologies = {
    "CHEBI": OntologySource(
        name = "CHEBI - Chemical Entities of Biological Interest", 
        file = "http://purl.obolibrary.org/obo/chebi.owl",
        description = "A structured classification of molecular entities of biological interest focusing on 'small' chemical compounds."), 
    "CHMO": OntologySource(
        name = "CHMO - Chemical Methods Ontology", 
        file = "http://purl.obolibrary.org/obo/chmo.owl",
        description = "CHMO, the chemical methods ontology, describes methods used to collect data in chemical experiments, such as mass spectrometry and electron microscopy prepare and separate material for further analysis, such as sample ionisation, chromatography, and electrophoresis synthesise materials, such as epitaxy and continuous vapour deposition It also describes the instruments used in these experiments, such as mass spectrometers and chromatography columns. It is intended to be complementary to the Ontology for Biomedical Investigations (OBI)."), 
    "CRO": OntologySource(
        # The Contributor Role Ontology (CRO) is an extension of the CASRAI Contributor Roles Taxonomy (CRediT) and replaces the former Contribution Ontology.
        name = "CRO - Contributor Role Ontology",
        file = "http://purl.obolibrary.org/obo/cro.owl",
        description = "A classification of the diverse roles performed in the work leading to a published research output in the sciences. Its purpose to provide transparency in contributions to scholarly published work, to enable improved systems of attribution, credit, and accountability."),
    "EDAM": OntologySource(
        name = "EDAM - EMBRACE Data and Methods", 
        file = "http://edamontology.org/EDAM.owl",
        description = "EDAM (EMBRACE Data and Methods) is an ontology of common bioinformatics operations, topics, types of data including identifiers, and formats. EDAM comprises common concepts (shared within the bioinformatics community) that apply to semantic annotation of resources."), 
    "EFO": OntologySource(
        name = "EFO - Experimental Factor Ontology", 
        file = "http://www.ebi.ac.uk/efo/efo.owl",
        description = "The Experimental Factor Ontology (EFO) provides a systematic description of many experimental variables available in EBI databases, and for external projects such as the NHGRI GWAS catalogue. It combines parts of several biological ontologies, such as anatomy, disease and chemical compounds. The scope of EFO is to support the annotation, analysis and visualization of data handled by many groups at the EBI and as the core ontology for OpenTargets.org"), 
    "GENEPIO": OntologySource(
        name = "GENEPIO - Genomic Epidemiology Ontology", 
        file = "http://purl.obolibrary.org/obo/genepio.owl",
        description = "The Genomic Epidemiology Ontology (GenEpiO) covers vocabulary necessary to identify, document and research foodborne pathogens and associated outbreaks."),
    "MI": OntologySource(
        name = "MI - Molecular Interactions Controlled Vocabulary", 
        file = "http://purl.obolibrary.org/obo/mi.owl",
        description = "A structured controlled vocabulary for the annotation of experiments concerned with protein-protein interactions."),
    "MMO": OntologySource(
        name = "MMO - Measurement method ontology", 
        file = "http://purl.obolibrary.org/obo/mmo.owl",
        description = "A representation of the variety of methods used to make clinical and phenotype measurements."),
    "MS": OntologySource(
        name = "Metabolomics Standards Initiative Ontology (MSIO)",
        file = "http://purl.obolibrary.org/obo/ms.owl",
        description = "A structured controlled vocabulary for the annotation of experiments concerned with proteomics mass spectrometry."),
    "MSIO": OntologySource(
        name = "MS - Mass spectrometry ontology",
        file = "http://purl.obolibrary.org/obo/msio.owl",
        description = "MSIO aims to provide a single point of entry to support semantic markup of experiments making use of NMR and MS techniques to identify, measure and quantify small molecules known as metabolites. MSIO covers metabolite profiling, targeted or undertargeted, tracer based applications. MSIO reuses a number of resources such as CHEBI, DUO, NMRCV, OBI, and STATO."),
    "NCBITAXON": OntologySource(
        name = "NCBI organismal classification", 
        file = "http://purl.obolibrary.org/obo/ncbitaxon.owl",
        description = "An ontology representation of the NCBI organismal taxonomy"),
    "NCIT": OntologySource(
        name = "NCI Thesaurus OBO Edition", 
        file = "http://purl.obolibrary.org/obo/ncit.owl",
        description = "The NCIt OBO Edition project aims to increase integration of the NCIt with OBO Library ontologies. NCIt is a reference terminology that includes broad coverage of the cancer domain, including cancer related diseases, findings and abnormalities. NCIt OBO Edition releases should be considered experimental."),
    "OBI": OntologySource(
        name = "OBI - Ontology for Biomedical Investigations", 
        file = "http://purl.obolibrary.org/obo/obi.owl",
        description = "An integrated ontology for the description of life-science and clinical investigations"),
    "OMIABIS": OntologySource(
        name = "Ontologized MIABIS", 
        file = "http://purl.obolibrary.org/obo/omiabis.owl",
        description = "An ontological version of MIABIS (Minimum Information About BIobank data Sharing)"),
    "PRIDE": OntologySource(
        name = "PRIDE Controlled Vocabulary",
        file = "http://purl.obolibrary.org/obo/pride_cv.obo",
        description = "The PRIDE PRoteomics IDEntifications (PRIDE) database is a centralized, standards compliant, public data repository for proteomics data, including protein and peptide identifications, post-translational modifications and supporting spectral evidence."),
    "STATO": OntologySource(
        name = "STATO: the statistical methods ontology",
        file = "http://purl.obolibrary.org/obo/stato.owl",
        description = "STATO is the statistical methods ontology. It contains concepts and properties related to statistical methods, probability distributions and other concepts related to statistical analysis, including relationships to study designs and plots."),
    "UBERON": OntologySource(
        name = "Uber-anatomy ontology",
        file = "http://purl.obolibrary.org/obo/uberon.owl",
        description = "Uberon is an integrated cross-species anatomy ontology representing a variety of entities classified according to traditional anatomical criteria such as structure, function and developmental lineage. The ontology includes comprehensive relationships to taxon-specific anatomical ontologies, allowing integration of functional, phenotype and expression data."),
    "tbd": OntologySource(
        name = "to be defined",
        file = "http://tbd.owl",
        description = "tbd")
}

Create investigation

In [3]:
investigation = Investigation(
    filename = "i_investigation.txt", 
    identifier = "", 
    title = "EATRIS-Plus - Flagship in Personalised Medicine",
    description = "EATRIS-Plus project aims to support the long-term sustainability of the European Research Infrastructure for Translational Medicine (EATRIS) by delivering innovative scientific tools to the research community, strengthening the EATRIS financial model, and reinforcing EATRIS' leadership in the European Research Area in the field of Personalised Medicine (PM).",
    submission_date = "",
    public_release_date = "",
    ontology_source_references = [o for o in ontologies.values()],
    publications = None,
    contacts = [
        Person(
            last_name = "Keidong", 
            first_name = "Eliis",
            #mid_initials = "",
            affiliation = "EATRIS",
            roles = [
                OntologyAnnotation(
                    term = "project management role",
                    term_source = ontologies["CRO"], 
                    term_accession ="http://purl.obolibrary.org/obo/CRO_0000065")])],
    studies = None,
    comments = None)

Create study

In [4]:
cohort_study = Study(
    filename = "s_study.txt", 
    identifier = "", 
    title = "Multi-omics data of a Czech population cohort",
    description = "Multi-omics data of a Czech population cohort", 
    submission_date = "", 
    public_release_date = "",
    contacts = [
        Person(
            last_name = "Hajduch", 
            first_name = "Marian",
            #mid_initials = "",
            affiliation = "Institute of Molecular and Translational Medicine (IMTM), Palacky University Olomouc")],
    design_descriptors = [
        OntologyAnnotation(
                term = "Multi-omics study",
                term_source = ontologies["PRIDE"],
                term_accession = "http://purl.obolibrary.org/obo/PRIDE_0000461"),
        OntologyAnnotation(
                term = "population based study design",
                term_source = ontologies["OMIABIS"],
                term_accession = "http://purl.obolibrary.org/obo/OMIABIS_0001022")], 
    factors = None, 
    protocols = None,
    assays = None,
    sources = None,
    samples = None,
    process_sequence = None,
    other_material = None,
    characteristic_categories = None,
    comments = None,
    units = None)
investigation.studies.append(cohort_study)

Define sample collection protocol

In [5]:
# sample collection protocol and protocol parameters 
protocol_params = {
    "anatomical entity": ProtocolParameter(
        parameter_name = OntologyAnnotation(
            term = "anatomical entity",
            term_source = ontologies["UBERON"],
            term_accession = "http://purl.obolibrary.org/obo/UBERON_0001062")),
    "Scan polarity": ProtocolParameter(
        parameter_name = OntologyAnnotation(
            term = "scan polarity",
            term_source = ontologies["MS"],
            term_accession = "http://purl.obolibrary.org/obo/MS_1000465")),
        #values: 
        #negative scan http://purl.obolibrary.org/obo/MS_1000129
        #positive scan http://purl.obolibrary.org/obo/MS_1000130
    "Scan m/z range": ProtocolParameter(
        parameter_name = OntologyAnnotation(
            term = "Scan m/z range")),
            #term_source = ontologies[""],
            #term_accession = "")),
    "Instrument": ProtocolParameter(
        parameter_name = OntologyAnnotation(
            term = "Instrument",
            term_source = ontologies["MS"],
            term_accession = "http://purl.obolibrary.org/obo/MS_1000463")),
    "Ion source": ProtocolParameter(
        parameter_name = OntologyAnnotation(
            term = "Ion source",
            term_source = ontologies["CHMO"],
            term_accession = "http://purl.obolibrary.org/obo/CHMO_0000960")),
    "Mass analyzer": ProtocolParameter(
        parameter_name = OntologyAnnotation(
            term = "Mass analyzer",
            term_source = ontologies["MS"],
            term_accession = "http://purl.obolibrary.org/obo/MS_1000451"))    
}

protocols = {
    "sample_collection": Protocol( 
        name = "sample_collection_protocol",
        protocol_type = OntologyAnnotation(
            term = "sample collection",
            term_source = None, #ontologies[""], 
            term_accession = None),
        parameters = [
            protocol_params["anatomical entity"]
            # additional parameters could include, e.g. collection or storage procedure
        ]),
    "dna_extraction": Protocol(
        name = "DNA extraction",
        protocol_type = OntologyAnnotation(
            term = "DNA extraction",
            term_source = ontologies["OBI"], 
            term_accession = "http://purl.obolibrary.org/obo/OBI_0000257")),
    "rna_extraction": Protocol(
        name = "RNA extraction",
        protocol_type = OntologyAnnotation(
            term = "RNA extraction",
            term_source = ontologies["OBI"], 
            term_accession = "http://purl.obolibrary.org/obo/OBI_0666666")),
    "WGS": Protocol(
        name = "Whole Genome Sequencing", 
        protocol_type = OntologyAnnotation(
            term = "Whole Genome Sequencing",
            term_source = ontologies["NCIT"],
            term_accession = "http://purl.obolibrary.org/obo/NCIT_C101294")),
    "RNAseq": Protocol(
        name = "mRNA Sequencing", 
        protocol_type = OntologyAnnotation(
            term = "mRNA Sequencing",
            term_source = ontologies["NCIT"],
            term_accession = "http://purl.obolibrary.org/obo/NCIT_C129432")),
    "microRNAseq": Protocol(
        name = "MicroRNA Sequencing", 
        protocol_type = OntologyAnnotation(
            term = "MicroRNA Sequencing",
            term_source = ontologies["NCIT"],
            term_accession = "http://purl.obolibrary.org/obo/NCIT_C156057")),
    # metabolomics protocols
    "Extraction_acylcarnitines": Protocol(
        name = "Extraction - Plasma acylcarnitine analysis", 
        protocol_type = OntologyAnnotation(
            term = "extraction",
            term_source = ontologies["OBI"],
            term_accession = "http://purl.obolibrary.org/obo/OBI_0302884"), #also in: MSIO, etc.
        description = "<i>Sample preparation:</i> To 50 uL plasma or serum were added 50 uL standard 1 (23.5 umol/L <sup>2</sup>H<sub>3</sub>-free carnitine in H<sub>2</sub>O) and 50 uL standard 2 (10 umol/L <sup>2</sup>H<sub>3</sub>-C<sub>2</sub>-, 2 umol/L <sup>2</sup>H<sub>3</sub>-C<sub>8</sub>- and 2 umol/L <sup>2</sup>H<sub>3</sub>-C<sub>16</sub>-carnitine in acetonitrile). Samples were mixed and subsequently deproteinized with 500 uL of acetonitrile and centrifuged. The resulting supernatant was dried under nitrogen at 45°C, and subsequently derivatized in 100 uL butanol-HCl for 15 min at 60°C. Samples were dried under nitrogen at 45°C and redissolved in 300 uL of acetonitrile. Prior to injection, 70 uL of the acetonitrile containing the acylcarnitines was mixed with 30 uL H<sub>2</sub>O.",
        uri = "https://doi.org/10.1023/A:1005587617745"
        #parameters = [
         # control samples, standards, etc.   
        # Post Extraction
        # Derivatization
        #]
    ), 
    "Chromatography_acylcarnitines": Protocol(
        name = "??Chromatography?? - Plasma acylcarnitine analysis", 
        protocol_type = OntologyAnnotation(
            term = "chromatography",
            term_source = ontologies["CHMO"],
            term_accession = "http://purl.obolibrary.org/obo/CHMO_0001000")), #also in: PRIDE
      # parameters instrument, column, mobile phase, gradient, settings, injection volume
        # Chromatography Instrument
        # Column type
        # Column model
        # Guard column
        # Autosampler model
    "Mass spectrometry_acylcarnitines": Protocol(
        name = "Mass spectrometry - Plasma acylcarnitine analysis", 
        protocol_type = OntologyAnnotation(
            term = "mass spectrometry",
            term_source = ontologies["CHMO"],
            term_accession = "http://purl.obolibrary.org/obo/CHMO_0000470"), #also in: MSIO, PRIDE
        description = "<i>Sample introduction and ESI-MS/MS analysis:</i> Free carnitine and acylcarnitines were measured using scanning for precursor ions of mass 85 from 200 to 550 Da during 2 min on a Micromass Quattro II triple-quadrupole mass spectrometer, using a Gilson 231XL autosampler and a Hewlett-Packard HP-1100 HPLC pump, essentially as described previously (Rashed et al 1995a, 1997).",
        uri = "https://doi.org/10.1023/A:1005587617745",
        parameters = [protocol_params["Scan polarity"],
                      protocol_params["Scan m/z range"],
                      protocol_params["Instrument"],
                      protocol_params["Ion source"],
                      protocol_params["Mass analyzer"]]),
        #  instrument used (make & manufacturer), ion source, ionisation mode (positive / negative), m/z range, and specific parameters such as temperatures, voltages, flow rates, scan rates
    "Data transformation_acylcarnitines": Protocol(
        name = "Data transformation - Plasma acylcarnitine analysis", 
        protocol_type = OntologyAnnotation(
            term = "data transformation",
            term_source = ontologies["OBI"],
            term_accession = "http://purl.obolibrary.org/obo/OBI_0200000"), #also in: EFO, GENEPIO, OMIABIS, STATO, etc.
        description = "<i>Calibration:</i> Calibration curves were obtained for free carnitine in the range 5-100 umol/L, for acetylcarnitine in the range 2-40 umol/L and for all other available acylcarnitines in the range 0.25-6 umol/L by adding standards to a normal plasma pool. All calibration curves were linear (r > 0.99, data not shown). For unsaturated and hydroxylated acylcarnitines an identical response to that of their saturated counterparts was assumed.",
        uri = "https://doi.org/10.1023/A:1005587617745"),
        # methods / pipelines and software used to transform the raw data
    "Metabolite identification_acylcarnitines": Protocol(
        name = "Metabolite identification - Plasma acylcarnitine analysis", 
        protocol_type = OntologyAnnotation(
            term = "metabolite identification",
            term_source = ontologies["MI"],
            term_accession = "http://purl.obolibrary.org/obo/MI_2131")),
    # details of methods / pipelines, reference databases and software used to identify features and/or annotate metabolites
}
for protocol in protocols.values():
    cohort_study.protocols.append(protocol)

Define sources and derived samples

In [6]:
# add dummy samples
for source_idx in range(1, 4):
    # create source (=individual)
    source_name = "individual_{0}".format(source_idx)
    source = Source(
        name = source_name,
        characteristics = [
            Characteristic(
                category = OntologyAnnotation(
                    term = "Organism",
                    term_source = ontologies["OBI"],
                    term_accession = "http://purl.obolibrary.org/obo/OBI_0100026"),
                value = OntologyAnnotation(
                    term = "Homo sapiens",
                    term_source = ontologies["NCBITAXON"],
                    term_accession = "http://purl.obolibrary.org/obo/NCBITaxon_9606"))])
    cohort_study.sources.append(source)
    # create sample
    sample_name = "sample_{0}".format(source_idx)
    sample = Sample(
        name = sample_name, 
        derives_from = source)
    cohort_study.samples.append(sample)
    # sample collection process
    sample_collection_process = Process(
        name = "samplecollection_{0}".format(source_idx),
        executes_protocol = protocols["sample_collection"],
        parameter_values = [
            ParameterValue(
                category = protocol_params["anatomical entity"], #ProtocolParameter 
                value = OntologyAnnotation(
                    term = "blood",
                    term_source = ontologies["UBERON"],
                    term_accession = "http://purl.obolibrary.org/obo/UBERON_0000178"))],
        inputs = [source],
        outputs = [sample])
    cohort_study.process_sequence.append(sample_collection_process)

Define assays

See assay options: https://github.com/ISA-tools/isa-api/blob/master/isatools/resources/config/yaml/assay-options.yml

In [7]:
assays = {
    "genomics_imtm": Assay(
        filename = "a_assay_genomics_imtm.txt",
        measurement_type = OntologyAnnotation(
            term = "DNA Sequence",
            term_source = ontologies["NCIT"],
            term_accession = "http://purl.obolibrary.org/obo/NCIT_C13299"),
        technology_type = OntologyAnnotation(
            term = "Whole Genome Sequencing",
            term_source = ontologies["NCIT"], 
            term_accession = "http://purl.obolibrary.org/obo/NCIT_C101294"),
        technology_platform = OntologyAnnotation(
            term = "Illumina platform",
            term_source = ontologies["GENEPIO"], 
            term_accession = "http://purl.obolibrary.org/obo/GENEPIO_0001923")
    ), 
#    "dnamethylation_uu": Assay(
#        filename = "a_assay_dnamethylation_uu.txt"),
#        measurement_type = OntologyAnnotation(
#            term = "Methylation Beta Value",
#            term_source = ontologies["NCIT"],
#            term_accession = "http://purl.obolibrary.org/obo/NCIT_C164051"),
#        technology_type = OntologyAnnotation(
#            term = "DNA methylation profiling by array assay",
#            term_source = ontologies["OBI"], 
#            term_accession = "http://purl.obolibrary.org/obo/OBI_0001332"),
#        technology_platform = OntologyAnnotation(
#            term = "Illumina Infinium MethylationEPIC BeadChip",
#            term_source = ontologies["OBI"], 
#            term_accession = "http://purl.obolibrary.org/obo/OBI_0002131")), 
    "rnaseq_fimm": Assay(
        # https://eatris.sharepoint.com/:w:/r/sites/eatrisplusgroup/_layouts/15/Doc.aspx?sourcedoc=%7B4966EA29-2AE7-459A-8F58-A3E5962B6988%7D&file=RNAseq%20workflow.docx&action=default&mobileredirect=true
        filename = "a_assay_rnaseq_fimm.txt",
        measurement_type = OntologyAnnotation(
            term = "Binary format",
            term_source = ontologies["EDAM"],
            term_accession = "http://edamontology.org/format_2333"),
        technology_type = OntologyAnnotation(
            term = "mRNA Sequencing",
            term_source = ontologies["NCIT"], 
            term_accession = "http://purl.obolibrary.org/obo/NCIT_C129432"),
        technology_platform = OntologyAnnotation(
            term = "Illumina NovaSeq 6000",
            term_source = ontologies["OBI"], 
            term_accession = "http://purl.obolibrary.org/obo/OBI_0002630")),
#    "mirnaseq_fimm": Assay(
#        filename = "a_assay_mirnaseq_fimm.txt"), 
#    "mirnaseq_sermas": Assay(
#        filename = "a_assay_mirnaseq_sermas.txt"), 
#    "proteomics_imtm": Assay(
#        filename = "a_assay_proteomics_imtm.txt"), 
    "metabolomics_acylcarnitines_rumc": Assay(
        filename = "a_assay_metabolomics_acylcarnitines_rumc.txt",
        measurement_type = OntologyAnnotation(
            term = "targeted metabolite profiling",
            term_source = ontologies["MSIO"],
            term_accession = "http://purl.obolibrary.org/obo/MSIO_0000100"),
        technology_type = OntologyAnnotation(
            term = "electrospray ionisation tandem mass spectrometry",
            term_source = ontologies["CHMO"], 
            term_accession = "http://purl.obolibrary.org/obo/CHMO_0000577"),
        technology_platform = "Micromass Quattro II triple-quadrupole mass spectrometer")
            #term_source = ontologies["tbd"], 
            #term_accession = "tbd"))
    # http://snomed.info/id/442613004 Quantitative measurement of acylcarnitine in plasma specimen (procedure)
#    "metabolomics_aminoacids_mumc": Assay(
#        filename = "a_assay_metabolomics_aminoacids_mumc.txt"), 
#    "metabolomics_fattyacids_mumc": Assay(
#        filename = "a_assay_metabolomics_fattyacids_mumc.txt"), 
}

Data file types are:
'Raw Data File',
'Derived Data File',
'Image File',
'Acquisition Parameter Data File',
'Derived Spectral Data File',
'Protein Assignment File',
'Raw Spectral Data File',
'Peptide Assignment File',
'Array Data File',
'Derived Array Data File',
'Post Translational Modification Assignment File',
'Derived Array Data Matrix File',
'Free Induction Decay Data File',
'Metabolite Assignment File',
'Array Data Matrix File'

In [8]:
for idx, sample in enumerate(cohort_study.samples):
    # DNA extraction
    dna = Material(
        name = "DNA_{0}".format(sample.name),
        type_ = "Extract Name")
    dna_extraction_process = Process(
        name = "DNA_extraction_{0}".format(sample.name),
        executes_protocol = protocols["dna_extraction"],
        inputs = [sample], 
        outputs = [dna])
    # RNA extraction
    rna = Material(
        name = "RNA_{0}".format(sample.name),
        type_ = "Extract Name")
    rna_extraction_process = Process(
        name = "RNA_extraction_{0}".format(sample.name),
        executes_protocol = protocols["rna_extraction"],
        inputs = [sample], 
        outputs = [rna])
    # genomics measurement
    wgs_raw_file = DataFile(
        filename = "WGS_rawdata_{0}".format(dna.name), 
        label = "Raw Data File", 
        generated_from = [dna])
    wgs_process = Process(
        name = "WGS_{0}".format(dna.name),
        executes_protocol = protocols["WGS"],
        inputs = [dna], 
        outputs = [wgs_raw_file])
    plink(dna_extraction_process, wgs_process)
    assays["genomics_imtm"].samples.append(sample)
    assays["genomics_imtm"].data_files.append(wgs_raw_file)
    assays["genomics_imtm"].other_material.append(dna)
    assays["genomics_imtm"].process_sequence.append(dna_extraction_process)
    assays["genomics_imtm"].process_sequence.append(wgs_process)
    # transcriptomics - RNAseq
    rnaseq_binarybasecall_file = DataFile(
        filename = "RNAseq_BCL_{0}".format(rna.name), 
        label = "Raw Data File", 
        generated_from = [rna])
    rnaseq_process = Process(
        name = "RNAseq_{0}".format(rna.name),
        executes_protocol = protocols["RNAseq"],
        inputs = [rna], 
        outputs = [rnaseq_binarybasecall_file])
    plink(rna_extraction_process, rnaseq_process)
    assays["rnaseq_fimm"].samples.append(sample)
    assays["rnaseq_fimm"].data_files.append(rnaseq_binarybasecall_file)
    assays["rnaseq_fimm"].other_material.append(rna)
    assays["rnaseq_fimm"].process_sequence.append(rna_extraction_process)
    assays["rnaseq_fimm"].process_sequence.append(rnaseq_process)
    # microRNAseq - FIMM
    # metabolomics_acylcarnitines_rumc
    prepared_acylcarnitine_sample = Material(
        name = "prepared_sample_acylcarnitine_{0}".format(sample.name),
        type_ = "Extract Name")
    acylcarnitine_sampleprep_process = Process(
        name = "Sample_preparation_plasma_acylcarnitine_analysis_{0}".format(sample.name),
        executes_protocol = protocols["Extraction_acylcarnitines"],
        inputs = [sample], 
        outputs = [prepared_acylcarnitine_sample])
    #acylcarnitine_chromatography_process = Process(
    #    name = "Chromatography_plasma_acylcarnitine_analysis_{0}".format(sample.name),
    #    executes_protocol = protocols["Chromatography_acylcarnitines"],
    #    inputs = [prepared_acylcarnitine_sample]) 
    #    #outputs = [prepared_acylcarnitine_sample])
    acylcarnitine_MS_process = Process(
        name = "MS_plasma_acylcarnitine_analysis_{0}".format(sample.name),
        executes_protocol = protocols["Mass spectrometry_acylcarnitines"],
        inputs = [prepared_acylcarnitine_sample], 
        #outputs = [prepared_acylcarnitine_sample])
        parameter_values = [
            ParameterValue(category = protocol_params["Scan polarity"],
                           value = "tbd"),
            ParameterValue(category = protocol_params["Scan m/z range"],
                           value = "tbd - distinguish between MS1 and MS2?"),
            ParameterValue(category = protocol_params["Instrument"],
                           value = "Micromass Quattro II triple-quadrupole mass spectrometer"),
            ParameterValue(category = protocol_params["Ion source"],
                           value = OntologyAnnotation(
                               term = "electrospray ionization",
                               term_source = ontologies["CHMO"],
                               term_accession = "http://purl.obolibrary.org/obo/MS_1000073")), 
            ParameterValue(category = protocol_params["Mass analyzer"],
                           value = OntologyAnnotation(
                               term = "triple quadrupole mass spectrometer",
                               term_source = ontologies["CHMO"],
                               term_accession = "http://purl.obolibrary.org/obo/CHMO_0002021"))])
    acylcarnitine_datatransform_process = Process(
        name = "Data_transformation_plasma_acylcarnitine_analysis_{0}".format(sample.name),
        executes_protocol = protocols["Data transformation_acylcarnitines"])
    acylcarnitine_metaboliteident_process = Process(
        name = "Metabolite_identification_plasma_acylcarnitine_analysis_{0}".format(sample.name),
        executes_protocol = protocols["Metabolite identification_acylcarnitines"])
    plink(acylcarnitine_sampleprep_process, acylcarnitine_MS_process)
    plink(acylcarnitine_MS_process, acylcarnitine_datatransform_process)
    plink(acylcarnitine_datatransform_process, acylcarnitine_metaboliteident_process)
    assays["metabolomics_acylcarnitines_rumc"].samples.append(sample)
    assays["metabolomics_acylcarnitines_rumc"].other_material.append(prepared_acylcarnitine_sample)
    assays["metabolomics_acylcarnitines_rumc"].process_sequence.append(acylcarnitine_sampleprep_process)
    #assays["metabolomics_acylcarnitines_rumc"].process_sequence.append(acylcarnitine_chromatography_process)
    assays["metabolomics_acylcarnitines_rumc"].process_sequence.append(acylcarnitine_MS_process)
    assays["metabolomics_acylcarnitines_rumc"].process_sequence.append(acylcarnitine_datatransform_process)
    assays["metabolomics_acylcarnitines_rumc"].process_sequence.append(acylcarnitine_metaboliteident_process)
    

In [9]:
# add assays to study
for assay in assays.values():
    cohort_study.assays.append(assay)

Write ISA-Tab files

In [10]:
# write to ISA-Tab
import os
out_dir = "."
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)
from isatools import isatab
isatab.dump(investigation, out_dir)

isatools.model.Investigation(identifier='', filename='i_investigation.txt', title='EATRIS-Plus - Flagship in Personalised Medicine', submission_date='', public_release_date='', ontology_source_references=[isatools.model.OntologySource(name='CHEBI - Chemical Entities of Biological Interest', file='http://purl.obolibrary.org/obo/chebi.owl', version='', description='A structured classification of molecular entities of biological interest focusing on 'small' chemical compounds.', comments=[]), isatools.model.OntologySource(name='CHMO - Chemical Methods Ontology', file='http://purl.obolibrary.org/obo/chmo.owl', version='', description='CHMO, the chemical methods ontology, describes methods used to collect data in chemical experiments, such as mass spectrometry and electron microscopy prepare and separate material for further analysis, such as sample ionisation, chromatography, and electrophoresis synthesise materials, such as epitaxy and continuous vapour deposition It also describes the in

In [11]:
# read ISA-Tab files and validate
with open(os.path.join(out_dir, "i_investigation.txt")) as my_file:
    ISA = isatab.validate(my_file)

2021-11-02 07:37:59,675 [INFO]: isatab.py(validate:4196) >> Loading... ./i_investigation.txt
2021-11-02 07:37:59,888 [INFO]: isatab.py(validate:4198) >> Running prechecks...
2021-11-02 07:38:00,038 [INFO]: isatab.py(validate:4219) >> Finished prechecks...
2021-11-02 07:38:00,039 [INFO]: isatab.py(validate:4220) >> Loading configurations found in /opt/conda/envs/isa_env/lib/python3.8/site-packages/isatools/resources/config/xml
2021-11-02 07:38:00,061 [INFO]: isatab.py(validate:4225) >> Using configurations found in /opt/conda/envs/isa_env/lib/python3.8/site-packages/isatools/resources/config/xml
2021-11-02 07:38:00,062 [ERROR]: isatab.py(check_measurement_technology_types:3268) >> (E) Could not load configuration for measurement type 'DNA Sequence' and technology type 'Whole Genome Sequencing' for STUDY ASSAY.0'
2021-11-02 07:38:00,063 [ERROR]: isatab.py(check_measurement_technology_types:3268) >> (E) Could not load configuration for measurement type 'Binary format' and technology type 

2021-11-02 07:38:00,173 [INFO]: isatab.py(validate:4416) >> Checking consistencies between study sample table and assay tables...
2021-11-02 07:38:00,174 [INFO]: isatab.py(validate:4421) >> Finished checking study sample table against assay tables...
2021-11-02 07:38:00,174 [ERROR]: isatab.py(validate:4326) >> Could not load config matching (targeted metabolite profiling, electrospray ionisation tandem mass spectrometry)
2021-11-02 07:38:00,207 [INFO]: isatab.py(validate:4416) >> Checking consistencies between study sample table and assay tables...
2021-11-02 07:38:00,210 [INFO]: isatab.py(validate:4421) >> Finished checking study sample table against assay tables...
2021-11-02 07:38:00,211 [INFO]: isatab.py(validate:4425) >> Skipping pooling test as there are outstanding errors
2021-11-02 07:38:00,212 [INFO]: isatab.py(validate:4434) >> Finished validation...


In [12]:
from isatools.tests import configurator
config_dict = test_isatab_configurator.load(self._config_dir)

ImportError: cannot import name 'configurator' from 'isatools.tests' (/opt/conda/envs/isa_env/lib/python3.8/site-packages/isatools/tests/__init__.py)

In [None]:
# write to ISA-JSON
# see example: https://isa-tools.org/isa-api/content/examples/example-createSimpleISAJSON.html
import json
from isatools.isajson import ISAJSONEncoder
with open(os.path.join(out_dir, "isa.json"), "w") as out_file:
    json.dump(
        investigation, 
        out_file,
        cls = ISAJSONEncoder, 
        sort_keys = True, 
        indent = 4, 
        separators = (',', ': '))

In [None]:
#from isatools.convert import json2isatab
#from isatools import isajson
#isajson.validate(open('isa.json'))
#with open("isa.json") as file_pointer:
#    json2isatab.convert(file_pointer, './ISA/')