## Persistent Landing

In [1]:
# Importing useful dependencies
import time
import boto3

In [2]:
# Setup S3 client for MinIO (MinIO implements Amazon S3 API)
s3 = boto3.client(
    "s3",
    endpoint_url="http://127.0.0.1:9000", # MinIO API endpoint
    aws_access_key_id="minioadmin", # User name
    aws_secret_access_key="minioadmin", # Password
)

In [3]:
# Create three sub-buckets inside persistent-landing, one per format
s3.put_object(Bucket="landing-zone", Key="persistent-landing/texts/") # Sub-bucket Text
s3.put_object(Bucket="landing-zone", Key="persistent-landing/images/") # Sub-bucket Image
s3.put_object(Bucket="landing-zone", Key="persistent-landing/videos/") # Sub-bucket Video

{'ResponseMetadata': {'RequestId': '186F91CA31896D38',
  'HostId': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '0',
   'etag': '"d41d8cd98f00b204e9800998ecf8427e"',
   'server': 'MinIO',
   'strict-transport-security': 'max-age=31536000; includeSubDomains',
   'vary': 'Origin, Accept-Encoding',
   'x-amz-checksum-crc32': 'AAAAAA==',
   'x-amz-checksum-type': 'FULL_OBJECT',
   'x-amz-id-2': 'dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8',
   'x-amz-request-id': '186F91CA31896D38',
   'x-content-type-options': 'nosniff',
   'x-ratelimit-limit': '1901',
   'x-ratelimit-remaining': '1901',
   'x-xss-protection': '1; mode=block',
   'date': 'Sat, 18 Oct 2025 11:17:56 GMT'},
  'RetryAttempts': 0},
 'ETag': '"d41d8cd98f00b204e9800998ecf8427e"',
 'ChecksumCRC32': 'AAAAAA==',
 'ChecksumType': 'FULL_OBJECT'}

In [4]:
# This function checks an object's ContentType (via head_object) and classifies it as "texts", "images", or "videos"
# based on whether the type starts with text/, image/, or video/.
def classify_object_by_head(client, bucket, key):
    # ask S3 for ContentType
    head = client.head_object(Bucket=bucket, Key=key)
    ct = head.get("ContentType", "")
    if ct.startswith("text/"):
        return "texts"
    elif ct.startswith("image/"):
        return "images"
    elif ct.startswith("video/"):
        return "videos"

In [5]:
# This function moves all files from the source_prefix folder to the dest_prefix folder,
# classifying each file as text, image, or video (based on ContentType), renaming it with a timestamped filename (ingestion time),
# copying it to the appropriate subfolder (texts/, images/, videos/), and then deleting the original files in the source_prefix.
def move_files(client, bucket, source_prefix="temporal-landing/", dest_prefix="persistent-landing/"):
    
    paginator = client.get_paginator("list_objects_v2") # It returns objects in pages and not all at once.

    for page in paginator.paginate(Bucket=bucket, Prefix=source_prefix):
        for obj in page.get("Contents", []):
            
            src_key = obj["Key"]

            if obj['Size'] == 0 and src_key.endswith("/"):
                continue

            # classify
            category = classify_object_by_head(client, bucket, src_key)

            # get file extension
            ext = src_key.split('.')[-1].split('?')[0]

            # new filename = timestamp + original extension
            ts = int(time.time() * 1000)  # milliseconds
            new_filename = f"{category[0:-1]}_{ts}.{ext}"

            # build destination key
            dest_key = f"{dest_prefix}{category}/{new_filename}"

            # copy then delete
            client.copy_object(Bucket=bucket, CopySource={"Bucket": bucket, "Key": src_key}, Key=dest_key)
            client.delete_object(Bucket=bucket, Key=src_key)

            print(f"Moved: {src_key} -> {dest_key}")

In [6]:
# Moving files from Temporal Landing to Persistent Landing and removing temporal files
move_files(s3, "landing-zone", "temporal-landing/", "persistent-landing/")

Moved: temporal-landing/image_1.jpg -> persistent-landing/images/image_1760786279860.jpg
Moved: temporal-landing/image_10.jpg -> persistent-landing/images/image_1760786279932.jpg
Moved: temporal-landing/image_100.jpg -> persistent-landing/images/image_1760786279989.jpg
Moved: temporal-landing/image_1000.jpg -> persistent-landing/images/image_1760786280056.jpg
Moved: temporal-landing/image_101.jpg -> persistent-landing/images/image_1760786280131.jpg
Moved: temporal-landing/image_102.jpg -> persistent-landing/images/image_1760786280218.jpg
Moved: temporal-landing/image_103.jpg -> persistent-landing/images/image_1760786280278.jpg
Moved: temporal-landing/image_104.jpg -> persistent-landing/images/image_1760786280334.jpg
Moved: temporal-landing/image_105.jpg -> persistent-landing/images/image_1760786280394.jpg
Moved: temporal-landing/image_106.jpg -> persistent-landing/images/image_1760786280458.jpg
Moved: temporal-landing/image_107.jpg -> persistent-landing/images/image_1760786280522.jpg
M