In [6]:
import os

import boto3
from botocore import UNSIGNED
from botocore.config import Config

import fsspec

In [4]:
def get_s3_links(bucket_name, prefix=''):
    """Get S3 links of all items in a specified bucket."""
    s3_links = []
    
    # Use paginator to handle large number of objects
    paginator = s3.get_paginator('list_objects_v2')
    
    # Create a paginator object
    pages = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
    
    for page in pages:
        if 'Contents' in page:
            for obj in page['Contents']:
                key = obj['Key']
                key = os.path.dirname(key)
                s3_link = f"s3://{bucket_name}/{key}/"
                s3_links.append(s3_link)

    # remove duplicate entries
    s3_links = list(set(s3_links))

    return s3_links

In [7]:
# Initialize a session using Amazon S3
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
kwargs = {'Bucket': bucket}

# Specify your bucket name
bucket_name = 'opera-pst-rs-pop1'

# Specify the prefix if you want to filter objects within a folder or with a specific prefix
prefix = 'products/DSWx_HLS/OPERA_L3_DSWx-HLS_T11SQA_'

# Get the list of S3 links
s3_links = get_s3_links(bucket_name, prefix)

In [8]:
# write to file
manifest_file = 'T11SQA_manifest.txt'

# Open the file in write mode
with open(manifest_file, 'w') as file:
    # Iterate through the list and write each string to the file
    for item in s3_links:
        file.write(item + "\n")