## Trusted Zone

In [1]:
# Importing useful dependencies
import boto3

In [2]:
# Setup S3 client for MinIO (MinIO implements Amazon S3 API)
s3 = boto3.client(
    "s3",
    endpoint_url="http://127.0.0.1:9000", # MinIO API endpoint
    aws_access_key_id="minioadmin", # User name
    aws_secret_access_key="minioadmin", # Password
)

In [3]:
# This function copies all objects from a bucket (under a prefix) into another bucket, creating the destination bucket if it doesn’t exist.
def replicate_bucket(src_bucket, dest_bucket, src_prefix=""):
    # Create destination bucket if it doesn't exist
    existing = s3.list_buckets()
    if dest_bucket not in [b["Name"] for b in existing["Buckets"]]:
        s3.create_bucket(Bucket=dest_bucket)

    paginator = s3.get_paginator("list_objects_v2") # It returns objects in pages and not all at once.
    for page in paginator.paginate(Bucket=src_bucket, Prefix=src_prefix):
        for obj in page.get("Contents", []):

            key = obj["Key"]

            if obj['Size'] == 0 and key.endswith("/"): # skip the folder itself
                continue

            # Remove the prefix part from the key
            new_key = key[len(src_prefix):]

            # Copy object without top-level folder
            copy_source = {"Bucket": src_bucket, "Key": key}
            s3.copy_object(Bucket=dest_bucket, Key=new_key, CopySource=copy_source)

            print(f"Copied: {key} -> {new_key}")

In [4]:
# Replicate files from Formatted Zone to Trusted Zone
replicate_bucket(src_bucket = "formatted-zone", src_prefix="", dest_bucket = "trusted-zone")

Copied: images/image_1760786279860.png -> images/image_1760786279860.png
Copied: images/image_1760786279932.png -> images/image_1760786279932.png
Copied: images/image_1760786279989.png -> images/image_1760786279989.png
Copied: images/image_1760786280056.png -> images/image_1760786280056.png
Copied: images/image_1760786280131.png -> images/image_1760786280131.png
Copied: images/image_1760786280218.png -> images/image_1760786280218.png
Copied: images/image_1760786280278.png -> images/image_1760786280278.png
Copied: images/image_1760786280334.png -> images/image_1760786280334.png
Copied: images/image_1760786280394.png -> images/image_1760786280394.png
Copied: images/image_1760786280458.png -> images/image_1760786280458.png
Copied: images/image_1760786280522.png -> images/image_1760786280522.png
Copied: images/image_1760786280581.png -> images/image_1760786280581.png
Copied: images/image_1760786280636.png -> images/image_1760786280636.png
Copied: images/image_1760786280702.png -> images/im

In [5]:
# At this moment, all buckets should have the same number of files. Let's check that

# Count valid files in each bucket
for bucket in s3.list_buckets()["Buckets"]:
    
    bucket_name = bucket["Name"]
    count = 0

    paginator = s3.get_paginator("list_objects_v2")
    for page in paginator.paginate(Bucket=bucket_name):
        
        if "Contents" in page:
            for obj in page["Contents"]:
                
                key = obj["Key"]
                
                # Skip folder placeholders (end with "/") and metadata/system files
                if not key.endswith("/") and not key.startswith(("_", ".")):
                    count += 1

    print(f"Bucket: {bucket_name}, Files: {count}")

Bucket: formatted-zone, Files: 2018
Bucket: landing-zone, Files: 2018
Bucket: trusted-zone, Files: 2018
