# Importing libraries

In [2]:
import requests
import json
import os
import pathlib
import subprocess
import time
from tqdm import tqdm  # Importing the tqdm library

### Start by downloading the manifest

In [3]:
version = '20230830'
url = 'https://allen-brain-cell-atlas.s3-us-west-2.amazonaws.com/releases/%s/manifest.json' % version
manifest = json.loads(requests.get(url).text)
print("version: ", manifest['version'])

version:  20230830


In [4]:
manifest.keys()
print("version:",manifest['version'])
print("resource_uri:",manifest['resource_uri'])

version: 20230830
resource_uri: s3://allen-brain-cell-atlas/


Get the size of the manifest

In [5]:
GB = float(float(1024) ** 3)

for r in manifest['directory_listing'] :    
    r_dict =  manifest['directory_listing'][r]
    for d in r_dict['directories'] :
        d_dict = r_dict['directories'][d]        
        print(d_dict['relative_path'],":",'%0.2f GB' % (d_dict['total_size']/GB))
        

expression_matrices/MERFISH-C57BL6J-638850/20230830 : 14.21 GB
metadata/MERFISH-C57BL6J-638850/20230830 : 1.81 GB
expression_matrices/MERFISH-C57BL6J-638850-sections/20230630 : 14.31 GB
expression_matrices/WMB-10Xv2/20230630 : 104.16 GB
expression_matrices/WMB-10Xv3/20230630 : 176.41 GB
expression_matrices/WMB-10XMulti/20230830 : 0.21 GB
metadata/WMB-10X/20230830 : 2.39 GB
metadata/WMB-taxonomy/20230830 : 0.01 GB
metadata/WMB-neighborhoods/20230830 : 3.00 GB
image_volumes/Allen-CCF-2020/20230630 : 0.37 GB
metadata/Allen-CCF-2020/20230630 : 0.00 GB
image_volumes/MERFISH-C57BL6J-638850-CCF/20230630 : 0.11 GB
metadata/MERFISH-C57BL6J-638850-CCF/20230830 : 2.01 GB
expression_matrices/Zhuang-ABCA-1/20230830 : 3.09 GB
metadata/Zhuang-ABCA-1/20230830 : 1.33 GB
metadata/Zhuang-ABCA-1-CCF/20230830 : 0.21 GB
expression_matrices/Zhuang-ABCA-2/20230830 : 1.30 GB
metadata/Zhuang-ABCA-2/20230830 : 0.57 GB
metadata/Zhuang-ABCA-2-CCF/20230830 : 0.08 GB
expression_matrices/Zhuang-ABCA-3/20230830 : 1.69

Set the download base

In [6]:
download_base = '/shared/Data/abc_download_root'

In [7]:
def download_file( file_dict ) :
    
    print(file_dict['relative_path'],file_dict['size'])
    local_path = os.path.join( download_base, file_dict['relative_path'] )
    local_path = pathlib.Path( local_path )
    remote_path = manifest['resource_uri'] + file_dict['relative_path']

    command = "aws s3 cp --no-sign-request %s %s" % (remote_path, local_path)
    print(command)

    start = time.process_time()
    # Uncomment to download file
    result = subprocess.run(command.split(' '),stdout=subprocess.PIPE)
    print("time taken: ", time.process_time() - start)

## Download individual files from WMB-10Xv2

In [7]:
expression_matrices = manifest['file_listing']['WMB-10Xv2']['expression_matrices']
file_dict = expression_matrices['WMB-10Xv2-TH']['log2']['files']['h5ad']
print('size:',file_dict['size'])
download_file( file_dict )

size: 4038679930
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad 4038679930
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad
time taken:  0.09134629800000016


In [8]:
expression_matrices = manifest['file_listing']['WMB-10Xv2']['expression_matrices']
file_dict = expression_matrices['WMB-10Xv2-TH']['raw']['files']['h5ad']
print('size:',file_dict['size'])
download_file( file_dict )

size: 4038679930
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-raw.h5ad 4038679930
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-raw.h5ad
time taken:  0.09228066099999999


Download individual files from WMB-10XMulti

In [9]:
datasets = ['WMB-10XMulti']
for d in datasets :
    expression_matrices = manifest['file_listing'][d]['expression_matrices']
    file_dict = expression_matrices[d]['log2']['files']['h5ad']
    print('size:',file_dict['size'])
    download_file( file_dict )

size: 89318511
expression_matrices/WMB-10XMulti/20230830/WMB-10XMulti-log2.h5ad 89318511
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10XMulti/20230830/WMB-10XMulti-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10XMulti/20230830/WMB-10XMulti-log2.h5ad
time taken:  0.0051590729999999585


In [10]:
datasets = ['WMB-10XMulti']
for d in datasets :
    expression_matrices = manifest['file_listing'][d]['expression_matrices']
    file_dict = expression_matrices[d]['raw']['files']['h5ad']
    print('size:',file_dict['size'])
    download_file( file_dict )

size: 132220015
expression_matrices/WMB-10XMulti/20230830/WMB-10XMulti-raw.h5ad 132220015
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10XMulti/20230830/WMB-10XMulti-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10XMulti/20230830/WMB-10XMulti-raw.h5ad
time taken:  0.0059165059999999325


Get the list of files in the WMB-10Xv2 directory

In [16]:
expression_matrices = manifest['file_listing']['WMB-10Xv2']['expression_matrices']
for file_dict in expression_matrices :
    # print('size:',file_dict['size'])
    print( file_dict )

# file_dict = expression_matrices['WMB-10Xv2-TH']['raw']['files']['h5ad']
# print('size:',file_dict['size'])
# download_file( file_dict )

WMB-10Xv2-Isocortex-2
WMB-10Xv2-MB
WMB-10Xv2-TH
WMB-10Xv2-CTXsp
WMB-10Xv2-Isocortex-4
WMB-10Xv2-HY
WMB-10Xv2-OLF
WMB-10Xv2-Isocortex-3
WMB-10Xv2-Isocortex-1
WMB-10Xv2-HPF


In [22]:
for r in manifest['directory_listing'] :
    
    r_dict =  manifest['directory_listing'][r]
    # print(r_dict)
    for d in r_dict['directories'] :
        
        if d != 'image_volumes' :
            continue
        d_dict = r_dict['directories'][d]
        local_path = os.path.join( download_base, d_dict['relative_path'])
        local_path = pathlib.Path( local_path )
        remote_path = manifest['resource_uri'] + d_dict['relative_path']
        
        command = "aws s3 sync --no-sign-request %s %s" % (remote_path, local_path)
        print(command)
        
        start = time.process_time()
        # Uncomment to download directories
        # result = subprocess.run(command.split(),stdout=subprocess.PIPE)
        # print("time taken: ", time.process_time() - start)
  

aws s3 sync --no-sign-request s3://allen-brain-cell-atlas/image_volumes/Allen-CCF-2020/20230630 /shared/Data/abc_download_root/image_volumes/Allen-CCF-2020/20230630
aws s3 sync --no-sign-request s3://allen-brain-cell-atlas/image_volumes/MERFISH-C57BL6J-638850-CCF/20230630 /shared/Data/abc_download_root/image_volumes/MERFISH-C57BL6J-638850-CCF/20230630


## Download individual files from WMB-10Xv2 [log2 file]

First let's see what is in the manifest

In [33]:
for r in manifest['directory_listing'] :    
    r_dict =  manifest['directory_listing'][r]
    for d in r_dict['directories'] :
        if d != 'expression_matrices' :
            continue
        print(d)
        d_dict = r_dict['directories'][d]   
        # if d_dict != 'WMB-10Xv2' :
        #     continue
        print(d_dict)

expression_matrices
{'version': '20230830', 'relative_path': 'expression_matrices/MERFISH-C57BL6J-638850/20230830', 'url': 'https://allen-brain-cell-atlas.s3.us-west-2.amazonaws.com/expression_matrices/MERFISH-C57BL6J-638850/20230830/', 'view_link': 'https://allen-brain-cell-atlas.s3.us-west-2.amazonaws.com/index.html#expression_matrices/MERFISH-C57BL6J-638850/20230830/', 'total_size': 15255179148}
expression_matrices
{'version': '20230630', 'relative_path': 'expression_matrices/MERFISH-C57BL6J-638850-sections/20230630', 'url': 'https://allen-brain-cell-atlas.s3.us-west-2.amazonaws.com/expression_matrices/MERFISH-C57BL6J-638850-sections/20230630/', 'view_link': 'https://allen-brain-cell-atlas.s3.us-west-2.amazonaws.com/index.html#expression_matrices/MERFISH-C57BL6J-638850-sections/20230630/', 'total_size': 15363706998}
expression_matrices
{'version': '20230630', 'relative_path': 'expression_matrices/WMB-10Xv2/20230630', 'url': 'https://allen-brain-cell-atlas.s3.us-west-2.amazonaws.com/

### Download individual files from WMB-10Xv2 [log2 file]

In [None]:
# Initialize the dictionary
expression_matrices = manifest['file_listing']['WMB-10Xv2']['expression_matrices']

# Initialize the tqdm progress bar
categories = list(expression_matrices.keys())
with tqdm(total=len(categories), desc="Downloading Files") as pbar:

    # Iterate over the keys, which represent different categories
    for category in categories:
        
        # Look for 'log2' key in each category dictionary
        if 'log2' in expression_matrices[category]:
            
            # Get the 'files' dictionary under 'log2'
            files_dict = expression_matrices[category]['log2']['files']
            
            # Check if 'h5ad' format files are present
            if 'h5ad' in files_dict:
                file_dict = files_dict['h5ad']
                
                # Prepare the local and remote paths for downloading
                local_path = os.path.join(download_base, file_dict['relative_path'])
                local_path = pathlib.Path(local_path)
                remote_path = manifest['resource_uri'] + file_dict['relative_path']
                
                # Print size and paths before downloading
                print('size:', file_dict['size'])
                print('Local Path:', local_path)
                print('Remote Path:', remote_path)
                
                # Download the file using your existing download_file function
                # Or you could directly insert the AWS S3 command here
                download_file(file_dict)
        
        # Update the progress bar
        pbar.update(1)


Downloading Files:   0%|          | 0/10 [00:00<?, ?it/s]

size: 9444387082
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-log2.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-log2.h5ad 9444387082
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-log2.h5ad


Downloading Files:  10%|█         | 1/10 [01:55<17:15, 115.00s/it]

time taken:  0.20598851000000007
size: 817433734
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-log2.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-log2.h5ad 817433734
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-log2.h5ad


Downloading Files:  20%|██        | 2/10 [02:07<07:17, 54.68s/it] 

time taken:  0.025496951999999906
size: 4038679930
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad 4038679930
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-log2.h5ad


Downloading Files:  30%|███       | 3/10 [03:06<06:36, 56.70s/it]

time taken:  0.095607628
size: 1740441622
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-log2.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-log2.h5ad 1740441622
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-log2.h5ad


Downloading Files:  40%|████      | 4/10 [03:32<04:27, 44.52s/it]

time taken:  0.04063343400000008
size: 8692589466
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-log2.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-log2.h5ad 8692589466
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-log2.h5ad


### Now download individual files from WMB-10Xv2 [raw file]

In [9]:
# Initialize the dictionary
expression_matrices = manifest['file_listing']['WMB-10Xv2']['expression_matrices']

# Initialize the tqdm progress bar
categories = list(expression_matrices.keys())
with tqdm(total=len(categories), desc="Downloading Files") as pbar:

    # Iterate over the keys, which represent different categories
    for category in categories:
        
        # Look for 'raw' key in each category dictionary
        if 'raw' in expression_matrices[category]:
            
            # Get the 'files' dictionary under 'raw'
            files_dict = expression_matrices[category]['raw']['files']
            
            # Check if 'h5ad' format files are present
            if 'h5ad' in files_dict:
                file_dict = files_dict['h5ad']
                
                # Prepare the local and remote paths for downloading
                local_path = os.path.join(download_base, file_dict['relative_path'])
                local_path = pathlib.Path(local_path)
                remote_path = manifest['resource_uri'] + file_dict['relative_path']
                
                # Print size and paths before downloading
                print('size:', file_dict['size'])
                print('Local Path:', local_path)
                print('Remote Path:', remote_path)
                
                # Download the file using your existing download_file function
                # Or you could directly insert the AWS S3 command here
                download_file(file_dict)
        
        # Update the progress bar
        pbar.update(1)


Downloading Files:   0%|          | 0/10 [00:00<?, ?it/s]

size: 9444387082
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-raw.h5ad 9444387082
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-2-raw.h5ad


Downloading Files:  10%|█         | 1/10 [01:52<16:48, 112.10s/it]

time taken:  0.20220492899999987
size: 811153174
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-raw.h5ad 811153174
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-MB-raw.h5ad


Downloading Files:  20%|██        | 2/10 [02:03<07:03, 52.99s/it] 

time taken:  0.020529559999999947
size: 4038679930
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-raw.h5ad 4038679930
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-TH-raw.h5ad


Downloading Files:  30%|███       | 3/10 [03:01<06:25, 55.07s/it]

time taken:  0.0947930530000003
size: 1734161062
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-raw.h5ad 1734161062
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-CTXsp-raw.h5ad


Downloading Files:  40%|████      | 4/10 [03:25<04:18, 43.06s/it]

time taken:  0.042439166999999944
size: 8692589466
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-raw.h5ad 8692589466
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-4-raw.h5ad


Downloading Files:  50%|█████     | 5/10 [05:14<05:33, 66.71s/it]

time taken:  0.1759996639999999
size: 2908443982
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HY-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HY-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HY-raw.h5ad 2908443982
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HY-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HY-raw.h5ad


Downloading Files:  60%|██████    | 6/10 [05:55<03:52, 58.11s/it]

time taken:  0.06767890900000006
size: 5128120156
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-OLF-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-OLF-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-OLF-raw.h5ad 5128120156
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-OLF-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-OLF-raw.h5ad


Downloading Files:  70%|███████   | 7/10 [07:09<03:09, 63.16s/it]

time taken:  0.11671811900000018
size: 8457819034
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-3-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-3-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-3-raw.h5ad 8457819034
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-3-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-3-raw.h5ad


Downloading Files:  80%|████████  | 8/10 [08:54<02:32, 76.43s/it]

time taken:  0.18850982699999985
size: 8601133978
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-1-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-1-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-1-raw.h5ad 8601133978
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-1-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-Isocortex-1-raw.h5ad


Downloading Files:  90%|█████████ | 9/10 [10:41<01:26, 86.01s/it]

time taken:  0.19585635299999993
size: 6096269724
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HPF-raw.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HPF-raw.h5ad
expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HPF-raw.h5ad 6096269724
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HPF-raw.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv2/20230630/WMB-10Xv2-HPF-raw.h5ad


Downloading Files: 100%|██████████| 10/10 [12:01<00:00, 72.11s/it]

time taken:  0.13518696200000013





### Download individual files from WMB-10Xv2 [log2 file]

In [10]:
# Initialize the dictionary
expression_matrices = manifest['file_listing']['WMB-10Xv3']['expression_matrices']

# Initialize the tqdm progress bar
categories = list(expression_matrices.keys())
with tqdm(total=len(categories), desc="Downloading Files") as pbar:

    # Iterate over the keys, which represent different categories
    for category in categories:
        
        # Look for 'log2' key in each category dictionary
        if 'log2' in expression_matrices[category]:
            
            # Get the 'files' dictionary under 'log2'
            files_dict = expression_matrices[category]['log2']['files']
            
            # Check if 'h5ad' format files are present
            if 'h5ad' in files_dict:
                file_dict = files_dict['h5ad']
                
                # Prepare the local and remote paths for downloading
                local_path = os.path.join(download_base, file_dict['relative_path'])
                local_path = pathlib.Path(local_path)
                remote_path = manifest['resource_uri'] + file_dict['relative_path']
                
                # Print size and paths before downloading
                print('size:', file_dict['size'])
                print('Local Path:', local_path)
                print('Remote Path:', remote_path)
                
                # Download the file using your existing download_file function
                # Or you could directly insert the AWS S3 command here
                download_file(file_dict)
        
        # Update the progress bar
        pbar.update(1)


Downloading Files:   0%|          | 0/13 [00:00<?, ?it/s]

size: 8356210362
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-2-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-2-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-2-log2.h5ad 8356210362
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-2-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-2-log2.h5ad


Downloading Files:   8%|▊         | 1/13 [01:29<17:59, 89.94s/it]

time taken:  0.154994764
size: 4067049816
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-PAL-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-PAL-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-PAL-log2.h5ad 4067049816
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-PAL-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-PAL-log2.h5ad


Downloading Files:  15%|█▌        | 2/13 [02:27<12:59, 70.84s/it]

time taken:  0.09150522900000002
size: 3277343842
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CTXsp-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CTXsp-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CTXsp-log2.h5ad 3277343842
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CTXsp-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CTXsp-log2.h5ad


Downloading Files:  23%|██▎       | 3/13 [03:13<09:57, 59.73s/it]

time taken:  0.0702136659999999
size: 13726487690
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MB-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MB-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MB-log2.h5ad 13726487690
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MB-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MB-log2.h5ad


Downloading Files:  31%|███       | 4/13 [05:48<14:34, 97.21s/it]

time taken:  0.29393174600000016
size: 5610691342
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CB-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CB-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CB-log2.h5ad 5610691342
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CB-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-CB-log2.h5ad


Downloading Files:  38%|███▊      | 5/13 [07:06<12:01, 90.21s/it]

time taken:  0.10694514100000019
size: 11768194128
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-1-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-1-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-1-log2.h5ad 11768194128
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-1-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-Isocortex-1-log2.h5ad


Downloading Files:  46%|████▌     | 6/13 [09:23<12:22, 106.09s/it]

time taken:  0.23650742900000044
size: 3114998442
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-OLF-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-OLF-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-OLF-log2.h5ad 3114998442
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-OLF-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-OLF-log2.h5ad


Downloading Files:  54%|█████▍    | 7/13 [10:06<08:33, 85.60s/it] 

time taken:  0.06603658600000006
size: 7206054638
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MY-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MY-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MY-log2.h5ad 7206054638
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MY-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-MY-log2.h5ad


Downloading Files:  62%|██████▏   | 8/13 [11:35<07:12, 86.57s/it]

time taken:  0.2007744960000002
size: 5811140682
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-TH-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-TH-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-TH-log2.h5ad 5811140682
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-TH-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-TH-log2.h5ad


Downloading Files:  69%|██████▉   | 9/13 [13:06<05:51, 87.84s/it]

time taken:  0.14923390299999983
size: 7248338584
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HY-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HY-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HY-log2.h5ad 7248338584
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HY-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HY-log2.h5ad


Downloading Files:  77%|███████▋  | 10/13 [14:51<04:39, 93.18s/it]

time taken:  0.2323855539999995
size: 11915297204
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-STR-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-STR-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-STR-log2.h5ad 11915297204
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-STR-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-STR-log2.h5ad


Downloading Files:  85%|████████▍ | 11/13 [17:49<03:58, 119.23s/it]

time taken:  0.4460739449999993
size: 5200570200
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-P-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-P-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-P-log2.h5ad 5200570200
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-P-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-P-log2.h5ad


Downloading Files:  92%|█████████▏| 12/13 [19:17<01:49, 109.76s/it]

time taken:  0.21700768500000045
size: 7409633208
Local Path: /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HPF-log2.h5ad
Remote Path: s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HPF-log2.h5ad
expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HPF-log2.h5ad 7409633208
aws s3 cp --no-sign-request s3://allen-brain-cell-atlas/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HPF-log2.h5ad /shared/Data/abc_download_root/expression_matrices/WMB-10Xv3/20230630/WMB-10Xv3-HPF-log2.h5ad


Downloading Files: 100%|██████████| 13/13 [21:16<00:00, 98.20s/it] 

time taken:  0.28977505599999986



