In [19]:
import boto3
import os
from dotenv import load_dotenv
from botocore.exceptions import NoCredentialsError, ClientError
import sys
sys_path = r'../src'
sys.path.append(sys_path)

from s3_manager import S3_Downloader
from config_loader import read_yaml_file

In [28]:
# Load environment variables from .env file
load_dotenv(r'../config/config.env')

config = read_yaml_file(r'../config/default_config.yaml')

In [29]:
client_obj = S3_Downloader.establish_connection_s3()

✅ S3 client created successfully...!!


In [27]:
config

{'s3': {'prod-completed': ['2025-05-30/', '2025-05-29/']},
 'sync': {'loc_download': './downloads', 'mode': 'incremental'},
 'filters': {'include_extensions': ['.csv', '.xlsx'],
  'exclude_files': ['skip_this_file.csv']},
 'logging': {'enable': True, 'log_file': 's3_download.log'}}

In [30]:
bucket_prefix_pair = S3_Downloader.preprocess_bucket_prefix(config['s3'])


In [31]:
def preprocess_bucket_prefix(s3_client: str, bucket_prefix_map: dict[str, list[str]]) -> list[tuple[str, str]]:
    """
    Converts a dictionary of {bucket: [prefixes]} into a list of (bucket, prefix) pairs.
    Checks if each bucket and prefix exists in S3.

    Args:
        bucket_prefix_map (Dict[str, List[str]]): A dictionary mapping bucket names to lists of prefixes.

    Returns:
        List[Tuple[str, str]]: A validated list of (bucket, prefix) tuples. 
                               If prefix list is empty, uses wildcard '*'.

    Raises:
        ClientError: If the bucket does not exist or access is denied.
    """
    valid_bucket_prefixes = []

    for bucket, prefixes in bucket_prefix_map.items():
        try:
            # Check if bucket exists
            s3_client.head_bucket(Bucket=bucket)
        except ClientError as e:
            logger.error(f"❌ Bucket '{bucket}' is not accessible: {e.response['Error']['Message']}")
            continue

        if not prefixes:
            valid_bucket_prefixes.append((bucket, '*'))
            continue

        for prefix in prefixes:
            try:
                # Check if prefix has at least one object
                response = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix, MaxKeys=1)
                if 'Contents' in response:
                    valid_bucket_prefixes.append((bucket, prefix))
                else:
                    logger.warning(f"⚠️ No objects found for prefix '{prefix}' in bucket '{bucket}'. Skipping.")
            except ClientError as e:
                logger.error(f"❌ Failed to validate prefix '{prefix}' in bucket '{bucket}': {e.response['Error']['Message']}")

    return valid_bucket_prefixes

In [32]:
preprocess_bucket_prefix(client_obj, config['s3'])

[('prod-completed', '2025-05-30/'), ('prod-completed', '2025-03-01/')]

In [3]:
client_obj = S3_Downloader.establish_connection_s3()


✅ S3 client created successfully...!!


<botocore.client.S3 at 0x2899d4e2870>