In [1]:
import boto3
import os

# Disable signing for public access
from botocore import UNSIGNED
from botocore.client import Config

In [2]:
# Define the S3 bucket names
buckets = {
    "single_cell_profiling": "sea-ad-single-cell-profiling",
    "quantitative_neuropathology": "sea-ad-quantitative-neuropathology",
    "spatial_transcriptomics": "sea-ad-spatial-transcriptomics"
}

In [3]:
# Create an S3 client with unsigned requests
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))


In [7]:
import re

# Function to list files in the bucket with optional regex filter
def list_files(bucket_name, regex_pattern=None):
    print(f"Listing files in bucket: {bucket_name}")
    response = s3.list_objects_v2(Bucket=bucket_name)
    if 'Contents' in response:
        for item in response['Contents']:
            file_name = item['Key']
            if regex_pattern is None or re.search(regex_pattern, file_name):
                print(file_name)
    else:
        print(f"No files found in bucket {bucket_name}")

# Function to download all files in the bucket to a local folder
def download_bucket(bucket_name, local_dir):
    print(f"Downloading files from bucket: {bucket_name} to {local_dir}")
    os.makedirs(local_dir, exist_ok=True)
    response = s3.list_objects_v2(Bucket=bucket_name)
    
    if 'Contents' in response:
        for obj in response['Contents']:
            file_name = obj['Key']
            local_file_path = os.path.join(local_dir, file_name)
            os.makedirs(os.path.dirname(local_file_path), exist_ok=True)  # Create directories if needed
            
            print(f"Downloading {file_name}...")
            s3.download_file(bucket_name, file_name, local_file_path)
            print(f"Downloaded {file_name} to {local_file_path}")
    else:
        print(f"No files found in bucket {bucket_name}")

In [13]:
# Example usage: List files and download them
for key, bucket in buckets.items():
    list_files(bucket, regex_pattern='.*\.h5ad')  # List all files in the bucket


Listing files in bucket: sea-ad-single-cell-profiling
MTG/ATACseq/SEAAD_MTG_ATACseq_all-nuclei.2024-02-13.h5ad
MTG/ATACseq/SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H19.30.002_SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H19.30.004_SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H19.33.004_SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H20.33.001_SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H20.33.002_SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H20.33.004_SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H20.33.005_SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H20.33.008_SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H20.33.011_SEAAD_MTG_ATACseq_final-nuclei.2024-02-13.h5ad
MTG/ATACseq/donor_objects/H20.33.012_SEAAD_MTG_ATACseq_final-nuclei.2024-0

In [None]:
# Example usage: List files and download them
for key, bucket in buckets.items():
    
    download_bucket(bucket, f"./{key}")  # Download all files to a local directory