In [None]:
from aavomics import database
import os
import pandas
import shutil

In [None]:
read_sets_dict = {}

for read_set in database.READ_SETS:
    
    read_set_name = "AAVomics_" + read_set.name
    read_sets_dict[read_set_name] = {}
    
    animal = None
    
    sequencing_library = read_set.sequencing_libraries[0]
    
    if sequencing_library.type != "Transcriptome":
        continue
    
    tissue = sequencing_library.cell_set.source_tissue.region
    animal = sequencing_library.cell_set.source_tissue.animal
    
    read_sets_dict[read_set_name]["organism"] = "Mus musculus"
    read_sets_dict[read_set_name]["tissue"] = tissue
    read_sets_dict[read_set_name]["sex"] = "male"
    read_sets_dict[read_set_name]["birth_date"] = animal.DOB
    read_sets_dict[read_set_name]["collection_date"] = animal.extraction_date
    read_sets_dict[read_set_name]["animal_id"] = animal.name
    if animal.DOB is not None:
        read_sets_dict[read_set_name]["age"] = (animal.extraction_date - animal.DOB).days
    else:
        read_sets_dict[read_set_name]["dev_stage"] = "adult"
    
df = pandas.DataFrame.from_dict(read_sets_dict, orient="index")
df.to_csv(os.path.join(database.DATA_PATH, "aavomics_SRA_sample_metadata.csv"))

In [None]:
read_sets_dict = {}

for read_set in database.READ_SETS:
    
    read_set_name = "AAVomics_" + read_set.name
    read_sets_dict[read_set_name] = {}
    
    animal = None
    
    sequencing_library = read_set.sequencing_libraries[0]
    
    if sequencing_library.type == "Transcriptome":
        read_sets_dict[read_set_name]["library_selection"] = "RANDOM"
        files = os.listdir(os.path.join(database.DATA_PATH, "cell_sets", read_set.sequencing_libraries[0].cell_set.name, "transcriptome", "reads"))
    else:
        read_sets_dict[read_set_name]["library_selection"] = "PCR"
        files = os.listdir(os.path.join(database.DATA_PATH, "cell_sets", read_set.sequencing_libraries[0].cell_set.name, "virus", "reads"))
    
    filename_index = 0
    
    for file in files:
        
        prefix = "_".join(file.split("_")[0:-4])
        
        if prefix == read_set.name:
            if filename_index == 0:
                read_sets_dict[read_set_name]["filename"] = file
                filename_index += 1
            else:
                read_sets_dict[read_set_name]["filename%i" % (filename_index + 1)] = file
                filename_index += 1
        
    read_sets_dict[read_set_name]["sample_name"] = "AAVomics_" + "_".join(read_set.sequencing_libraries[0].cell_set.name.split("_")[1:])
    read_sets_dict[read_set_name]["type"] = read_set.sequencing_run.type
    
df = pandas.DataFrame.from_dict(read_sets_dict, orient="index")
df.to_csv(os.path.join(database.DATA_PATH, "aavomics_SRA_file_metadata.csv"))

In [None]:
UPLOAD_FOLDER = os.path.join(database.DATA_PATH, "SRA_upload")

if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)

for read_set in database.READ_SETS:
    
    read_set_name = "AAVomics_" + read_set.name
    read_sets_dict[read_set_name] = {}
    
    sequencing_library = read_set.sequencing_libraries[0]
    
    if sequencing_library.type == "Transcriptome":
        read_sets_dict[read_set_name]["library_selection"] = "RANDOM"
        reads_dir = os.path.join(database.DATA_PATH, "cell_sets", read_set.sequencing_libraries[0].cell_set.name, "transcriptome", "reads")

    else:
        read_sets_dict[read_set_name]["library_selection"] = "PCR"
        reads_dir = os.path.join(database.DATA_PATH, "cell_sets", read_set.sequencing_libraries[0].cell_set.name, "virus", "reads")
    
    files = os.listdir(reads_dir)
    filename_index = 0
    
    files_to_upload = []
    
    for file in files:
        
        prefix = "_".join(file.split("_")[0:-4])
        
        if prefix == read_set.name:
            shutil.copy(os.path.join(reads_dir, file), os.path.join(UPLOAD_FOLDER, file))