[Reference](https://amsayed.medium.com/coding-data-pipeline-design-patterns-in-python-44a705f0af9e)

In [1]:
class DataPipelineFacade:
    def __init__(self, db_client, api_client, file_system):
        self.db_client = db_client
        self.api_client = api_client
        self.file_system = file_system

    def extract_transform_load(self, source_type, source_config, transformations):
        if source_type == 'database':
            data = self.db_client.extract_data(**source_config)
        elif source_type == 'api':
            data = self.api_client.get_data(**source_config)
        elif source_type == 'file':
            data = self.file_system.load_csv(**source_config)
        else:
            raise ValueError("Invalid source type")

        transformed_data = self.apply_transformations(data, transformations)
        self.db_client.load_data(transformed_data)

    def apply_transformations(self, data, transformations):
        # ... Logic to apply transformations
        return transformed_data

In [2]:
class XMLAdapter:
    def __init__(self, xml_data):
        self.xml_data = xml_data
        # Use library like xmltodict to parse if needed

    def get_data(self):
        # Adapt XML structure to the format your pipeline expects
        ...
        return adapted_data

In [3]:
def logging_decorator(func):
    def inner(*args, **kwargs):
        print(f"Calling function: {func.__name__}")
        result = func(*args, **kwargs)
        print(f"Function completed.")
        return result
    return inner

@logging_decorator
def processing_step(data):
    # ...processing logic
    return processed_data

In [4]:
def logging_decorator(func):
    def inner(*args, **kwargs):
        print(f"Calling function: {func.__name__}")
        result = func(*args, **kwargs)
        print(f"Function completed.")
        return result
    return inner

@logging_decorator
def processing_step(data):
    # ...processing logic
    return processed_data

In [8]:
class CSVFileIterator:
    def __init__(self, filename):
        self.file = open(filename)
        self.reader = csv.reader(self.file)

    def __iter__(self):
        return self

    def __next__(self):
        row = next(self.reader)
        if not row:
            raise StopIteration
        return row

# Usage
for row in CSVFileIterator('large_data.csv'):
    # Process each row individually

In [7]:
class ProcessingStep:
    def __init__(self, successor=None):
        self.successor = successor

    def handle(self, data):
        if self.can_handle(data):
            return self.process(data)
        elif self.successor:
            return self.successor.handle(data)
        else:
            raise Exception("No suitable handler found")

    def can_handle(self, data):
        #  Logic to determine if this step can process data
        ...

    def process(self, data):
        #  Actual processing logic
        ...

In [9]:
class DataCleaningStrategy:
    def clean_data(self, data):
        raise NotImplementedError()

class BasicCleaning(DataCleaningStrategy):
    def clean_data(self, data):
        # ... Basic cleaning logic
        return data

class AdvancedCleaning(DataCleaningStrategy):
    def clean_data(self, data):
        # ...  Complex cleaning logic
        return data

class DataPipeline:
    def __init__(self, cleaning_strategy):
        self.cleaning_strategy = cleaning_strategy

    def process_data(self, data):
        cleaned_data = self.cleaning_strategy.clean_data(data)
        # ... Further processing