In [None]:
%run ./Global_Configurations

In [None]:
%run ./Layer_Utilities_NB

In [None]:
%run ./Extraction_NB

In [None]:
%run ./Transformation_NB

In [None]:
class ETLPipeline:
    def __init__(self, extractor, transformer):
        """
        Initializes the ETL pipeline with necessary components.

        Args:
            extractor (DataExtractor): The data extraction class instance.
            s3_target_path (str): The S3 path where the final processed data will be stored.
        """
        self.extractor = extractor
        self.transformer = transformer

    def run_pipeline(self):
        """
        Runs the ETL pipeline in the following sequence:
        1. Extracts data from DynamoDB and writes it to the Bronze layer.
        2. Reads data from the Bronze layer.
        3. Applies transformations.
        4. Writes transformed data to the S3 bucket.
        """
        # LayerUtils.create_etl_tracker_table()

        print("\nStarting ETL pipeline...")

        # Step 1: Extract data from DynamoDB and write to the Bronze layer
        print("\nExtracting data from DynamoDB...")
        self.extractor.extract_data()

        # Step 2: Read data from the Bronze layer
        print("\nReading data from the Bronze layer...")
        bronze_df = LayerUtils.read_from_bronze_layer()

        # Step 3: Apply transformations
        print("\nApplying transformations...")
        transformed_df = self.transformer.process_dataframe(bronze_df)

        print(transformed_df.columns)

        # Step 4: Write transformed data to S3
        print("\nWriting transformed data to S3...")
        LayerUtils.write_to_s3(transformed_df, bucket_name, bucket_path)

        print("\nETL pipeline completed successfully!")

        print()

In [None]:
data_extractor = DataExtractor(dynamo_table_name, bronze_layer_path, aws_access_key, aws_secret_access_key, region_name)
data_transformer = DataTransformer()
etl_pipeline = ETLPipeline(data_extractor, data_transformer)

In [None]:
etl_pipeline.run_pipeline()