In [2]:
from datetime import datetime, timedelta
from airflow.models import DAG
from airflow.operators.python import PythonOperator
from airflow.operators.empty import EmptyOperator
import graphviz
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
import seaborn as sns

# Configure better-looking plots
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (10, 4)

# Enhanced mock function with visualization
def mock_postgres_etl(**kwargs):
    """Mock ETL with visual feedback"""
    # Display connection status
    display(Markdown("### 🟢 Connected to PostgreSQL"))
    
    # Mock data processing visualization
    fig, ax = plt.subplots()
    sns.barplot(x=["bridge_customer_address", "dim_customer"], 
                y=[12500, 8400], 
                palette="Blues_d",
                ax=ax)
    ax.set_title("📊 Mock Record Counts by Table")
    plt.show()
    
    # Return status with timestamp
    return f"ETL completed at {datetime.now().strftime('%H:%M:%S')}"

# Create DAG with documentation
with DAG(
    dag_id="mock_postgres_etl",
    schedule="@daily",
    start_date=datetime(2024, 1, 1),
    catchup=False,
    default_args={
        "retries": 1,
        "retry_delay": timedelta(minutes=5),
    },
    doc_md="""### Postgres Mock ETL Pipeline
    **Purpose**: Prototype database operations before production deployment
    """
) as dag:
    
    start = EmptyOperator(task_id="start", doc="Pipeline trigger")
    
    postgres_task = PythonOperator(
        task_id="postgres_etl",
        python_callable=mock_postgres_etl,
        doc="Mock database extraction and transformation"
    )
    
    end = EmptyOperator(task_id="end", doc="Pipeline completion")

    start >> postgres_task >> end

# Enhanced Visualization
display(Markdown("## 🛠️ DAG Structure"))
dot = graphviz.Digraph()
for task in dag.tasks:
    dot.node(task.task_id, 
             shape="rectangle",
             style="filled",
             fillcolor="lightblue" if isinstance(task, EmptyOperator) else "lightgreen")
    
for upstream, downstream in dag.edges:
    dot.edge(upstream.task_id, downstream.task_id)
display(dot)

# Interactive Testing
display(Markdown("## 🧪 Test Execution"))
test_result = postgres_task.execute(context={"execution_date": datetime.now()})
display(Markdown(f"**Task Output**: `{test_result}`"))

# DAG Documentation
display(Markdown("## 📝 DAG Metadata"))
display(Markdown(f"""
- **DAG ID**: `{dag.dag_id}`
- **Schedule**: `{dag.schedule_interval}`
- **Start Date**: `{dag.start_date}`
- **Tasks**: `{[t.task_id for t in dag.tasks]}`
"""))

<DAG: mock_postgres_etl>