## LINK GEO and SRA accessions to data portal items

### There is an optional cell that given a list of replicate set accessions or a search for them can retrieve the accessions for those sets as well as linked exxperiments, biosamples and files

### To add dbxrefs to items provide as input a 3 column tab separated file:

### column 1 = any ID that can be used to retrieve the db item (uuid, accession, alias)
### column 2 = GEO accession (GSE, GSM, SAMN)
### column 3 = SRA accession (SRA, SRX)

In [None]:
from dcicutils import ff_utils
from functions.notebook_functions import *
from functions.wfr import *

my_auth = get_key('keyname')

### Optional - For a set of replicates specified by listing them or a search url - gather all the linked items that can get GEO or SRA accessions added - i.e. experiments, biosamples, files

In [None]:
sets_in_scope = ["4DNES2D4WOKP", "4DNES6MT2D6K", "4DNES8R5RDVD", "4DNESLQDCU1A", "4DNESNTQV8XL", "4DNESVIKXE89", "4DNESX5HHBRP", "4DNESXR4J4MS"]

search_url  = '' # /search/?type=ExperimentSetReplicate&experimentset_type=replicate&lab.display_title=Sheng+Zhong%2C+UCSD&status=pre-release'

if sets_in_scope:
    esets = [ff_utils.get_metadata(i, my_auth) for i in sets_in_scope]
elif search_url:
    esets = [i for i in ff_utils.search_metadata(search_url, my_auth)]

for eset in esets:
    print(eset.get('accession'))
    for exp in eset.get('experiments_in_set'):
        print(exp.get('accession'))
        bios = exp.get('biosample')
        print(bios.get('accession'))
        files = exp.get('files')
        for file in files:
            print(file.get('accession'))

### Provide the path to a file with the 3 columns as specified above.  If there is not a value for the column it still must be present as an empty string

In [None]:
acc_file = '/path/to/file/with/ncbi_dbxrefs.txt'

ids2geo_sra = {}

with open(acc_file) as af:
    for aline in af:
        aline.rstrip()
        info = aline.split('\t')
        iid = info[0].strip()
        dbxrefs = []
        if info[1]:
            dbxrefs.append(f"GEO:{info[1].strip()}")
        if info[2].strip():
            dbxrefs.append(f"SRA:{info[2].strip()}")
        if dbxrefs:
            ids2geo_sra[iid] = dbxrefs
            
for iid, dbxrefs in ids2geo_sra.items():
    print(f"{iid}\t{dbxrefs}")

### If the output from the cell above looks as expected and you are ready to add the dbxrefs to the items then set action to **True** else dry run 

In [None]:
action = False

for iid, dbxrefs in ids2geo_sra.items():
    # check to see if there are any existing dbxrefs
    item = ff_utils.get_metadata(iid, key=my_auth)
    existing = item.get('dbxrefs', [])
    patchdata = existing + dbxrefs
    if action:
        # import pdb; pdb.set_trace()
        res = ff_utils.patch_metadata({'dbxrefs': patchdata}, iid, key = my_auth)
        print(f'item {iid} updated with dbxrefs: {patchdata}')
        print(res.get('status'))
    else:
        print(f"will patch {iid} with dbxrefs: {patchdata}")