In [None]:
import io
import tensorflow as tf

In [None]:
import boto3
from botocore.exceptions import NoCredentialsError

# Set your credentials here
aws_access_key_id = ''
aws_secret_access_key = ''
region_name = ''  # e.g., us-west-1

# Create a session using your credentials
session = boto3.Session(
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=region_name
)

# Then create an S3 client using this session
s3_client = session.client('s3')

# Now you can list your buckets
try:
    response = s3_client.list_buckets()
    print(response)
except NoCredentialsError:
    print("Credentials are not available or invalid.")

In [None]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _convert_to_example(image, mask):
    """Converts the given image and mask to a tf.train.Example message."""
    return tf.train.Example(features=tf.train.Features(feature={
        'image': _bytes_feature(image),
        'mask': _bytes_feature(mask)
    }))


In [None]:
def list_s3_objects(s3_client, bucket_name, prefix):
    paginator = s3_client.get_paginator('list_objects_v2')
    page_iterator = paginator.paginate(Bucket=bucket_name, Prefix=prefix)

    keys = []
    for page in page_iterator:
        if 'Contents' in page:
            for obj in page['Contents']:
                if not obj['Key'].endswith('/'):  # Exclude any 'folder' keys
                    keys.append(obj['Key'])
    return keys


In [None]:
# Set your bucket name and prefix
s3_tfrecord_key = ''
bucket_name = ''
image_prefix = ''
mask_prefix = ''
local_tfrecord_file = ''

# Retrieve image and mask keys
image_keys = list_s3_objects(s3_client, bucket_name, image_prefix)
mask_keys = list_s3_objects(s3_client, bucket_name, mask_prefix)

In [None]:
def write_tfrecords_to_s3(image_keys, mask_keys, bucket_name, s3_client, tfrecord_file_name, s3_tfrecord_key):
    with tf.io.TFRecordWriter(tfrecord_file_name) as writer:
        for image_key, mask_key in zip(image_keys, mask_keys):
            image_content = s3_client.get_object(Bucket=bucket_name, Key=image_key)['Body'].read()
            mask_content = s3_client.get_object(Bucket=bucket_name, Key=mask_key)['Body'].read()
            
            image_bytes = io.BytesIO(image_content)
            mask_bytes = io.BytesIO(mask_content)
            
            example = _convert_to_example(image_bytes.getvalue(), mask_bytes.getvalue())
            writer.write(example.SerializeToString())
            print(f'Written file: {image_key}')
            print(f'Written file: {mask_key}')
            

write_tfrecords_to_s3(image_keys, mask_keys, bucket_name, s3_client, local_tfrecord_file, s3_tfrecord_key)