In [10]:
import boto3
import urllib3
import os
from pprint import pprint
from dotenv import load_dotenv

load_dotenv()

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

# Create S3 client
s3 = boto3.client(
    "s3",
    endpoint_url="https://swfs-s3.lab.s-miras.com/",
    aws_access_key_id=os.getenv("S3_AWS_ACCESS_KEY_ID"),
    aws_secret_access_key=os.getenv("S3_AWS_SECRET_ACCESS_KEY"),
    verify=False,
)

# List buckets
response = s3.list_buckets()
pprint(response)

{'Buckets': [{'CreationDate': datetime.datetime(2025, 10, 3, 20, 44, 43, tzinfo=tzutc()),
              'Name': 'airflow'},
             {'CreationDate': datetime.datetime(2025, 10, 3, 20, 49, 45, tzinfo=tzutc()),
              'Name': 'data-archive'},
             {'CreationDate': datetime.datetime(2025, 10, 3, 20, 49, 45, tzinfo=tzutc()),
              'Name': 'data-in-processing'},
             {'CreationDate': datetime.datetime(2025, 10, 3, 20, 49, 45, tzinfo=tzutc()),
              'Name': 'data-marts'},
             {'CreationDate': datetime.datetime(2025, 10, 3, 20, 49, 59, tzinfo=tzutc()),
              'Name': 'data-metadata'},
             {'CreationDate': datetime.datetime(2025, 10, 3, 20, 49, 45, tzinfo=tzutc()),
              'Name': 'data-normalized'},
             {'CreationDate': datetime.datetime(2025, 10, 3, 20, 49, 45, tzinfo=tzutc()),
              'Name': 'data-raw'},
             {'CreationDate': datetime.datetime(2025, 10, 3, 20, 49, 45, tzinfo=tzutc()),
        

In [11]:
bucket_name = "airflow"

In [12]:
# Create bucket
try:
    s3.create_bucket(Bucket=bucket_name)
    pprint(f"Bucket '{bucket_name}' created successfully")
except s3.exceptions.BucketAlreadyExists:
    pprint(f"Bucket '{bucket_name}' already exists")

except Exception as e:
    pprint(f"Error creating bucket: {e}")

"Bucket 'airflow' already exists"


In [13]:
# show files in bucket
response = s3.list_objects_v2(Bucket=bucket_name)
pprint(response)

{'Contents': [{'ETag': '"b10a8db164e0754105b7a99be72e3fe5"',
               'Key': 'test.txt',
               'LastModified': datetime.datetime(2025, 10, 6, 13, 34, 42, tzinfo=tzutc()),
               'Owner': {'ID': ''},
               'Size': 11,
               'StorageClass': 'STANDARD'},
              {'ETag': '"8856507a65a1b2f6ca15050b252b8f34"',
               'Key': 'xcoms/test_dag/manual__2025-10-03T21:21:17.938363+00:00/pass_pandas_df_in_xcom/c59df39d-c638-4ec4-91f4-a69d868e6127.gz',
               'LastModified': datetime.datetime(2025, 10, 3, 21, 36, 54, tzinfo=tzutc()),
               'Owner': {'ID': ''},
               'Size': 1257,
               'StorageClass': 'STANDARD'},
              {'ETag': '"434695db4935399f4b80e29874acfd02"',
               'Key': 'xcoms/test_dag/manual__2025-10-03T21:21:17.938363+00:00/pass_polars_df_in_xcom/4ceab817-3cc6-42a7-8582-440b90b22635.gz',
               'LastModified': datetime.datetime(2025, 10, 3, 21, 36, 55, tzinfo=tzutc()),
      

In [14]:
s3.put_object(Bucket=bucket_name, Key="test.txt", Body=b"Hello World")

{'ResponseMetadata': {'RequestId': '1759757691613424393',
  'HostId': '',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'accept-ranges': 'bytes',
   'content-length': '0',
   'date': 'Mon, 06 Oct 2025 13:34:51 GMT',
   'etag': '"b10a8db164e0754105b7a99be72e3fe5"',
   'server': 'SeaweedFS 30GB 3.93',
   'x-amz-request-id': '1759757691613424393'},
  'RetryAttempts': 0},
 'ETag': '"b10a8db164e0754105b7a99be72e3fe5"'}

In [15]:
# Download file
response = s3.get_object(Bucket=bucket_name, Key="test.txt")
pprint(response["Body"].read())

b'Hello World'


In [16]:
# Configure lifecycle policy to delete objects after 3 days
lifecycle_config = {
    "Rules": [
        {
            "ID": "DeleteAfter3Days",
            "Status": "Enabled",
            "Filter": {"Prefix": ""},  # Apply to all objects
            "Expiration": {"Days": 3},
        }
    ]
}

try:
    s3.put_bucket_lifecycle_configuration(
        Bucket=bucket_name, LifecycleConfiguration=lifecycle_config
    )
    print("Lifecycle policy applied successfully")
except Exception as e:
    print(f"Error applying lifecycle policy: {e}")

Lifecycle policy applied successfully


In [17]:
import json

# Verify lifecycle policy
try:
    response = s3.get_bucket_lifecycle_configuration(Bucket=bucket_name)
    print("Current lifecycle configuration:")
    print(json.dumps(response["Rules"], indent=2))
except Exception as e:
    print(f"Error getting lifecycle policy: {e}")

Current lifecycle configuration:
[
  {
    "Expiration": {
      "Days": 3
    },
    "Prefix": "",
    "Status": "Enabled"
  }
]
