In [None]:
import os
from cellxgene_mods import CxG_API


CxG_API.config() # set env='dev' or 'staging' if working in either of those test environments"

**Specify the Collection to upload to**<br>
If a Revision, use the Revision ID, not the Published ID

In [None]:
collection_id = ''

**List the existing Datasets in the Collection**

In [None]:
coll_datasets = CxG_API.get_collection(collection_id)['datasets']
status = {d['dataset_id']:d['processing_status'] for d in coll_datasets}
titles = {d['dataset_id']:d['title'] for d in coll_datasets}
titles

**Set the directory that the files to upload are in**

In [None]:
directory = os.path.expanduser('~/Downloads/')

**List certain files in that directory**<br>
The curation_qa notebook saves files with a `_revised.h5ad` suffix

In [None]:
for f in os.listdir(directory):
    if f.endswith('_revised.h5ad') or f.endswith('fragments.tsv.gz'):
        print(f)

**Fill in Dataset ID and file names to upload**<br>
Use `new` for the _dataset_id_ if adding a Dataset, rather than replacing an existing Dataset\
_fragments_ is optional\
Use `existing` for _anndata_ if adding fragments to an existing Dataset without re-uploading the .h5ad

In [None]:
datasets = [
    {
        'dataset_id': '04a6b46d-138d-4cb0-b5a6-8cb85735590a',
        'anndata': 'existing',
        'fragments': directory + 'first_fragments.tsv.gz'
    },
    {
        'dataset_id': 'new',
        'anndata': directory + 'second_revised.h5ad',
        'fragments': directory + 'second_fragments.tsv.gz'
    },
    {
        'dataset_id': 'new',
        'anndata': directory + 'third_revised.h5ad'
    }
]

**Confirm the files are specified correctly, etc.**

In [None]:
all_ids = [d['dataset_id'] for d in datasets]
for index,d in enumerate(datasets):
    if d['anndata'] != 'existing' and not os.path.exists(d['anndata']):
        print(f"Invalid file: {d['anndata']}")
    if 'fragments' in d and not os.path.exists(d['fragments']):
        print(f"Invalid file: {d['fragments']}")
    if d['anndata'] == 'existing':
        if 'fragments' not in d:
            print(f"Must define fragments if revising an existing matrix for datasets[{index}]")
        if d['dataset_id'] == 'new':
            print(f"Must define either dataset_id or anndata file to upload for datasets[{index}]")
    if d['dataset_id'] != 'new':
        if all_ids.count(d['dataset_id']) > 1:
            print(f"Repeated dataset: {d['dataset_id']}")
        if d['dataset_id'] not in titles:
            print(f"Invalid dataset: {d['dataset_id']}")
            continue
        if status[d['dataset_id']] != 'SUCCESS':
            print(f"{d['dataset_id']} is processing_status:{status[d['dataset_id']]}, must wait for SUCCESS")

**Upload each Dataset**

In [None]:
for d in datasets:
    if d['dataset_id'] == 'new':
        d['dataset_id'] = CxG_API.create_dataset(collection_id)

    if d['anndata'] == 'existing':
        manifest = CxG_API.get_dataset_manifest(collection_id, d['dataset_id'])
    else:
        manifest = {
            'anndata': CxG_API.upload_local_datafile(d['anndata'], collection_id, d['dataset_id'])
        }

    if 'fragments' in d:
        manifest['atac_fragment'] = CxG_API.upload_local_datafile(d['fragments'], collection_id, d['dataset_id'])
    
    CxG_API.upload_datafiles_from_manifest(manifest, collection_id, d['dataset_id'])