---

### 6. **Custom Retry Count**

**Description:**
Write a **parameterized decorator** `retry(times=3)` that allows retry count to be passed dynamically.

---

In [1]:
import time
from functools import wraps

def retry(max_attempts=3, delay=1, backoff=2):
    
    def decorator(func):
        @wraps(func)  # Preserves function metadata (e.g., __name__, docstring)
        def wrapper(*args, **kwargs):
            attempt = 1
            current_delay = delay
            while attempt <= max_attempts:
                try:
                    return func(*args, **kwargs)
                except Exception as e:
                    if attempt == max_attempts:
                        raise  
                    print(f" Attempt {attempt}/{max_attempts} failed: {str(e)}. Retrying in {current_delay}s...")
                    time.sleep(current_delay)
                    current_delay *= backoff  
                    attempt += 1
        return wrapper
    return decorator


@retry(max_attempts=3, delay=1, backoff=2)
def connect_to_database():
    import random
    if random.random() < 0.7:  # 70% chance of failure
        raise ConnectionError("Database connection failed!")
    return "Connected successfully!"

# Test
print(connect_to_database())  

 Attempt 1/3 failed: Database connection failed!. Retrying in 1s...
 Attempt 2/3 failed: Database connection failed!. Retrying in 2s...


ConnectionError: Database connection failed!

---

### 7. **Conditional Logger**

**Description:**
Build a `log_if(debug=True)` decorator that **logs only if debug is True** — helpful in turning logging on/off in pipelines.

---

In [2]:
import functools

def log_if(debug=True):
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            if debug:
                print(f"[LOG] Calling {func.__name__} with args={args}, kwargs={kwargs}")
            result = func(*args, **kwargs)
            if debug:
                print(f"[LOG] {func.__name__} returned {result}")
            return result
        return wrapper
    return decorator

In [4]:
@log_if(debug=True)  
def add(a, b):
    return a + b

@log_if(debug=False)  
def subtract(a, b):
    return a - b

add(3, 5)
subtract(5, 2)

[LOG] Calling add with args=(3, 5), kwargs={}
[LOG] add returned 8


3

---

### 8. **Schema Validator**

**Description:**
Write a `validate_schema(required_fields)` decorator that checks if all required fields exist in a dictionary input to an ETL step.

---

In [3]:
from functools import wraps
from typing import List, Dict, Any

def validate_schema(required_fields: List[str]):
    def decorator(func):
        @wraps(func)
        def wrapper(input_data: Dict[str, Any], *args, **kwargs):
            missing_fields = [field for field in required_fields if field not in input_data]
            if missing_fields:
                raise ValueError(f"Missing required fields: {missing_fields}")
            return func(input_data, *args, **kwargs)
        return wrapper
    return decorator

In [4]:
@validate_schema(required_fields=["id", "name", "email"])
def process_user_data(user: dict):
    print(f"Processing user: {user['name']} (ID: {user['id']})")

# Valid input (no error)
user_ok = {"id": 1, "name": "Alice", "email": "alice@example.com"}
process_user_data(user_ok)  # Output: "Processing user: Alice (ID: 1)"

# Invalid input (raises error)
user_bad = {"id": 2, "name": "Bob"}  # Missing "email"
process_user_data(user_bad)  # Raises: ValueError: Missing required fields: ['email']

Processing user: Alice (ID: 1)


ValueError: Missing required fields: ['email']

---

### 9. **Field Sanitizer**

**Description:**
Write a decorator `sanitize_fields(fields_to_strip)` that **strips whitespaces** from the specified fields of an input dictionary.

---

In [5]:
from functools import wraps
from typing import List, Dict, Any

def sanitize_fields(fields_to_strip: List[str]):
    def decorator(func):
        @wraps(func)
        def wrapper(input_data: Dict[str, Any], *args, **kwargs):
            # Create a copy to avoid modifying the original dict
            sanitized_data = input_data.copy()
            for field in fields_to_strip:
                if field in sanitized_data and isinstance(sanitized_data[field], str):
                    sanitized_data[field] = sanitized_data[field].strip()
            return func(sanitized_data, *args, **kwargs)
        return wrapper
    return decorator

In [6]:
@sanitize_fields(fields_to_strip=["name", "email", "notes"])
def process_data(data: dict):
    print(f"Processed: {data}")

# Input with whitespace
raw_data = {
    "name": "  Alice  ",
    "email": " alice@example.com ",
    "age": 25,
    "notes": "  pending   "
}

process_data(raw_data)

Processed: {'name': 'Alice', 'email': 'alice@example.com', 'age': 25, 'notes': 'pending'}


---

### 10. **ETL Step Metadata**

**Description:**
Write a decorator `add_metadata(pipeline_name)` that adds `pipeline_name` and `timestamp` to the output of a decorated ETL function.

---

In [8]:
from functools import wraps
from datetime import datetime
from typing import Any, Dict

def add_metadata(pipeline_name: str):
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs) -> Dict[str, Any]:
            
            result = func(*args, **kwargs)
            
            
            metadata = {
                "pipeline": pipeline_name,
                "timestamp": datetime.utcnow().isoformat() + "Z",  # ISO-8601 with UTC
                "data": result  # Preserve original output
            }
            return metadata
        return wrapper
    return decorator

In [9]:
@add_metadata(pipeline_name="user_ingestion")
def extract_users(source: str) -> list:
   
    return [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]

output = extract_users("database")
print(output)

{'pipeline': 'user_ingestion', 'timestamp': '2025-05-03T19:26:34.578337Z', 'data': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}]}
