In [1]:
import boto3

def get_prefix_size(bucket_name, prefix):
    """
    Calculates the total size of objects in an S3 prefix.

    Args:
      bucket_name: The name of the S3 bucket.
      prefix: The prefix to search.

    Returns:
      The total size of the objects in bytes.
    """
    s3 = boto3.client('s3')
    total_size = 0
    paginator = s3.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
    for page in pages:
        for obj in page.get('Contents',):
            total_size += obj['Size']
    return total_size

#s3://gulf-waves/data/raw/historic_smd/
# Example usage
bucket_name = 'gulf-waves'
prefix = 'data/raw/historic_smd/'
total_size = get_prefix_size(bucket_name, prefix)
print(f"Total size of objects in prefix '{prefix}': {total_size} bytes")

Total size of objects in prefix 'data/raw/historic_smd/': 2836092722 bytes


In [2]:
import boto3

def get_prefix_size(bucket_name, prefix):
    """
    Calculates the total size of objects in an S3 prefix and returns it in human-readable format.

    Args:
      bucket_name: The name of the S3 bucket.
      prefix: The prefix to search.

    Returns:
      The total size of the objects in human-readable format (e.g., "1.2 KB", "3.5 MB", "2.1 GB").
    """
    s3 = boto3.client('s3')
    total_size = 0
    paginator = s3.get_paginator('list_objects_v2')
    pages = paginator.paginate(Bucket=bucket_name, Prefix=prefix)
    for page in pages:
        for obj in page.get('Contents',):
            total_size += obj['Size']

    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
        if total_size < 1024.0:
            return f"{total_size:.1f} {unit}"
        total_size /= 1024.0

    return f"{total_size:.1f} PB"

# Example usage
bucket_name = 'gulf-waves'
prefix = 'data/raw/historic_smd/'
total_size = get_prefix_size(bucket_name, prefix)
print(f"Total size of objects in prefix '{prefix}': {total_size}")

Total size of objects in prefix 'data/raw/historic_smd/': 2.6 GB
