# Example Script for parsing an ISA JSON

## Import statements

In [1]:
import json
import os
from ena_upload.json_parsing.ena_submission import EnaSubmission
from dotenv import dotenv_values


## Reading a JSON file

In [2]:

# Read json file
isa_json_file = open(
    "tests/test_data/simple_test_case_v2.json"
)
isa_json = json.load(isa_json_file)

## Setting some extra parameters

In [3]:


# Change this to 'True' if you want to export the resulting DataFrames to an xlsx.
export_to_excel = False
outputfolder = "./output_folder/"

required_assays = [
    {"assay_stream": "Ena stream 1"},
    ]

## Parsing

In [4]:


submission = EnaSubmission.from_isa_json(isa_json, required_assays)
submission_dfs = submission.generate_dataframes()

## Output

In [5]:

if (not os.path.exists(outputfolder)) and export_to_excel:
    os.makedirs(outputfolder)

for k, df in submission_dfs.items():
    print(f"Dataframe {k}:")
    display(df)
    if export_to_excel:
        df.to_excel(f"{outputfolder}{k}.xlsx")

print("Done!")


Dataframe study:


Unnamed: 0,alias,title,study_type,study_abstract,new_study_type,pubmed_id
0,study_alias_48_49,study_title_1,Transcriptome Analysis,study_abstract_1,,


Dataframe sample:


Unnamed: 0,alias,title,sample_description,collection date,accession,submission date,status,geographic location (country and/or sea),taxon_id
0,sample_alias_224,sample_title_1,sample_description_1,2020-10-11,,,add,Argentina,9606
1,sample_alias_225,sample_title_2,sample_description_2,2008-01-24,,,add,Belgium,646099


Dataframe experiment:


Unnamed: 0,alias,study_alias,sample_alias,library_name,title,accession,submission date,status,design_description,library_source,library_strategy,library_selection,library_layout,insert_size,platform,instrument_model
0,experiment_alias_227,study_alias_48_49,sample_alias_224,library_name_1,experiment_title_1,,,add,design_description_1,GENOMIC,WGA,RANDOM,PAIRED,250.0,LS454,454 GS 20
1,experiment_alias_228,study_alias_48_49,sample_alias_225,library_name_2,experiment_title_2,,,add,design_description_2,TRANSCRIPTOMIC,RNA-Seq,repeat fractionation,SINGLE,,Illumina,Illumina Genome Analyzer
2,experiment_alias_229,study_alias_48_49,sample_alias_225,library_name_2,experiment_title_3,,,add,design_description_3,TRANSCRIPTOMIC,RNA-Seq,repeat fractionation,SINGLE,,Illumina,Illumina Genome Analyzer


Dataframe run:


Unnamed: 0,alias,experiment_alias,file_name,file_type,file checksum,accession,submission date,status
0,run_alias_233_234,experiment_alias_227,ENA_TEST2.R1.fastq.gz,fastq,,,,add
1,run_alias_233_234,experiment_alias_227,ENA_TEST2.R2.fastq.gz,fastq,,,,add
2,run_alias_235,experiment_alias_229,ENA_TEST1.R1.fastq.gz,fastq,,,,add


Done!


# Automated pipeline for DataHub

In [None]:
config = dotenv_values(".env")
datahub_token = config["DATAHUB_API_TOKEN"]

In [None]:
import requests

url = "https://datahub-dev.elixir-belgium.org/single_pages/16/export_isa"
url = "http://localhost:3000/single_pages/2/export_isa"
data = {"key": "value"}
headers = {"Content-Type": "application/json"}

response = requests.post(url, json=data, headers=headers)
