**Imports**

In [None]:
import os
import zipfile
import boto3
from botocore.client import Config

**Retrieve MinIO credentials, initialize client and list all buckets and objects**

In [None]:
minio_endpoint = os.getenv('AWS_S3_ENDPOINT')
access_key = os.getenv('AWS_ACCESS_KEY_ID')
secret_key = os.getenv('AWS_SECRET_ACCESS_KEY')
bucket_name = os.getenv('AWS_S3_BUCKET')
region = os.getenv('AWS_DEFAULT_REGION')

s3_client = boto3.client(
    's3',
    endpoint_url=minio_endpoint,
    aws_access_key_id=access_key,
    aws_secret_access_key=secret_key,
    config=Config(signature_version='s3v4')
)

buckets = s3_client.list_buckets()
print("Buckets:")
for bucket in buckets['Buckets']:
    print(f"  {bucket['Name']}")

objects = s3_client.list_objects_v2(Bucket=bucket_name)
if 'Contents' in objects:
    print("Objects found in bucket")
else:
    print("  No objects found.")

**Download all the training data**

In [None]:
folder_to_download = "train/"
local_download_path = os.getcwd()
continuation_token = None

while True:
    if continuation_token:
        objects = s3_client.list_objects_v2(
            Bucket=bucket_name, Prefix=folder_to_download, ContinuationToken=continuation_token
        )
    else:
        objects = s3_client.list_objects_v2(
            Bucket=bucket_name, Prefix=folder_to_download
        )

    if 'Contents' in objects:
        for obj in objects['Contents']:
            object_key = obj['Key']

            if object_key.startswith(folder_to_download):
                local_file_path = os.path.join(local_download_path, object_key)

                os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
                s3_client.download_file(bucket_name, object_key, local_file_path)
                print(f"Downloaded {object_key} to {local_file_path}")

    if objects.get('IsTruncated'):
        continuation_token = objects['NextContinuationToken']
    else:
        break

**Zip the training data for next pipeline node**

In [None]:
def zip_directory(folder_path, zip_filename):
    with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, folder_path))

folder_path = './train'
zip_filename = './images_zipped.zip'
zip_directory(folder_path, zip_filename)