# Example Script for parsing an ISA JSON

## Import statements

In [5]:
import json
import os
from json_parsing.ena_submission import EnaSubmission


## Reading a JSON file

In [6]:

# Read json file
isa_json_file = open(
    "tests/test_data/multi_study_multi_assay_stream_investigation.json"
)
isa_json = json.load(isa_json_file)

## Setting some extra parameters

In [7]:


# Change this to 'True' if you want to export the resulting DataFrames to an xlsx.
export_to_excel = False
outputfolder = "./output_folder/"

required_assays = [
    {"assay_stream": "Ena stream 1"},
    {"ena_study_title": "Ena Study 2"},
    {"ena_study_title": "Ena Study 3"},
]

## Parsing

In [8]:


submission = EnaSubmission.from_isa_json(isa_json, required_assays)
submission_dfs = submission.generate_dataframes()

## Output

In [9]:

if (not os.path.exists(outputfolder)) and export_to_excel:
    os.makedirs(outputfolder)

for k, df in submission_dfs.items():
    print(f"Dataframe {k}:")
    display(df)
    if export_to_excel:
        df.to_excel(f"{outputfolder}{k}.xlsx")

print("Done!")


Dataframe study:


Unnamed: 0,alias,title,study_type,study_abstract,new_study_type,pubmed_id
0,https://datahub.elixir-belgium.org/studies/27_28,Ena Study 1,Whole Genome Sequencing,This is Ena Study 1.,,56
1,https://datahub.elixir-belgium.org/studies/29_30,Ena Study 2,Other,This is Ena Study 2.,My special study type,56
2,https://datahub.elixir-belgium.org/studies/31_32,Ena Study 3,Other,This is Ena Study 3.,My other special study type,7


Dataframe samples:


Unnamed: 0,alias,title,sample_description,collection date,accession,submission date,status,geographic location (country and/or sea),taxon_id
0,https://datahub.elixir-belgium.org/samples/142,Sample title 1,Sample description 1,2023,,,,Afghanistan,1234
1,https://datahub.elixir-belgium.org/samples/143,Sample title 2,Sample description 2,2022,,,,Afghanistan,1234
2,https://datahub.elixir-belgium.org/samples/144,Sample title 3,Sample description 3,2021,,,,Albania,2345
3,https://datahub.elixir-belgium.org/samples/145,Sample title 4,Sample description 4,2020,,,,Albania,2345
4,https://datahub.elixir-belgium.org/samples/164,Sample title 5,Sample description 5,2019,,,,Afghanistan,9876
5,https://datahub.elixir-belgium.org/samples/165,Sample title 6,Sample description 6,2018,,,,Albania,8765


Dataframe experiments:


Unnamed: 0,alias,study_alias,sample_alias,library_name,title,accession,submission date,status,library_construction_protocol,design_description,library_source,library_strategy,library_selection,library_layout,insert_size,platform,instrument_model
0,https://datahub.elixir-belgium.org/samples/146,https://datahub.elixir-belgium.org/studies/27_28,https://datahub.elixir-belgium.org/samples/142,Library 1,Library title 1,,,,My special protocol 1,Library description 1,GENOMIC,WGS,RANDOM,SINGLE,123,LS454,454 GS
1,https://datahub.elixir-belgium.org/samples/147,https://datahub.elixir-belgium.org/studies/27_28,https://datahub.elixir-belgium.org/samples/143,Library 2,Library title 2,,,,My special protocol 2,Library description 2,GENOMIC SINGLE CELL,WGA,PCR,PAIRED,234,Illumina,Illumina Genome Analyzer
2,https://datahub.elixir-belgium.org/samples/148,https://datahub.elixir-belgium.org/studies/27_28,https://datahub.elixir-belgium.org/samples/144,Library 3,Library title 3,,,,My special protocol 3,Library description 3,TRANSCRIPTOMIC,WXS,RANDOM PCR,SINGLE,345,PacBio,PacBio RS
3,https://datahub.elixir-belgium.org/samples/149,https://datahub.elixir-belgium.org/studies/27_28,https://datahub.elixir-belgium.org/samples/145,Library 4,Library title 4,,,,My special protocol 4,Library description 4,TRANSCRIPTOMIC SINGLE CELL,RNA-Seq,RT-PCR,PAIRED,456,Themo Fisher Scientific,AB 3730xL Genetic Analyzer
4,https://datahub.elixir-belgium.org/samples/154,https://datahub.elixir-belgium.org/studies/29_30,https://datahub.elixir-belgium.org/samples/142,Library 5,Library title 5,,,,My library construction protocol 5,Library design description 5,GENOMIC,ssRNA-seq,HMPR,SINGLE,123,LS454,454 GS 20
5,https://datahub.elixir-belgium.org/samples/155,https://datahub.elixir-belgium.org/studies/29_30,https://datahub.elixir-belgium.org/samples/143,Library 6,Library title 6,,,,My library construction protocol 6,Library design description 6,GENOMIC SINGLE CELL,miRNA-Seq,MF,PAIRED,234,Illumina,Illumina Genome Analyzer II
6,https://datahub.elixir-belgium.org/samples/156,https://datahub.elixir-belgium.org/studies/29_30,https://datahub.elixir-belgium.org/samples/144,Library 7,Library title 7,,,,My library construction protocol 7,Library design description 7,TRANSCRIPTOMIC,ncRNA-Seq,size fractionation,SINGLE,345,PacBio,PacBio RS II
7,https://datahub.elixir-belgium.org/samples/157,https://datahub.elixir-belgium.org/studies/29_30,https://datahub.elixir-belgium.org/samples/145,Library 8,Library title 8,,,,My library construction protocol 8,Library design description 8,SYNTHETIC,FL-cDNA,repeat fractionation,PAIRED,456,Themo Fisher Scientific,AB 3730 Genetic Analyzer
8,https://datahub.elixir-belgium.org/samples/166,https://datahub.elixir-belgium.org/studies/31_32,https://datahub.elixir-belgium.org/samples/164,Library 9,Library title 9,,,,My library construction protocol 9,Library design description 9,GENOMIC,EST,MNase,SINGLE,987,Themo Fisher Scientific,AB 3500xL Genetic Analyzer
9,https://datahub.elixir-belgium.org/samples/167,https://datahub.elixir-belgium.org/studies/31_32,https://datahub.elixir-belgium.org/samples/165,Library 10,Library title 10,,,,My library construction protocol 10,Library design description 10,SYNTHETIC,Hi-C,Oligo-dT,PAIRED,876,LS454,454 GS FLX


Dataframe runs:


Unnamed: 0,alias,experiment_alias,file_name,file_type,file checksum,accession,submission date,status
0,https://datahub.elixir-belgium.org/samples/150,ena_run_alias_prefix146,data_file_1.bam,bam,,,,
1,https://datahub.elixir-belgium.org/samples/151,ena_run_alias_prefix147,data_file_2.cram,cram,,,,
2,https://datahub.elixir-belgium.org/samples/152,ena_run_alias_prefix148,data_file_3.fastq,fastq,,,,
3,https://datahub.elixir-belgium.org/samples/153,ena_run_alias_prefix149,data_file_4.sff,sff,,,,
4,https://datahub.elixir-belgium.org/samples/158,ena_run_alias_prefix154,data file 5.bam,bam,,,,
5,https://datahub.elixir-belgium.org/samples/159,ena_run_alias_prefix155,data file 6.cram,cram,,,,
6,https://datahub.elixir-belgium.org/samples/160,ena_run_alias_prefix156,data file 7.fastq,fastq,,,,
7,https://datahub.elixir-belgium.org/samples/161,ena_run_alias_prefix157,data file 8.sff,sff,,,,
8,https://datahub.elixir-belgium.org/samples/168,ena_run_alias_prefix166,data file 9.sff,sff,,,,
9,https://datahub.elixir-belgium.org/samples/169,ena_run_alias_prefix167,data file 10.fastq,fastq,,,,


Done!
