# Week 3 — Workflow Orchestration with Prefect
**Goal:** Orchestrate a tiny ETL with retries, logging, and parameters.

## 0) Setup

In [1]:
# !pip -q install prefect pandas
import pandas as pd, io, os
try:
    from prefect import flow, task, get_run_logger
    from prefect.task_runners import ConcurrentTaskRunner
except Exception:
    print("If in Colab, run: !pip install prefect")
    raise

## 1) Tasks

In [2]:
@task(retries=2, retry_delay_seconds=2)
def extract() -> pd.DataFrame:
    import random
    logger = get_run_logger()
    logger.info("Extracting...")
    if random.random() < 0.2:
        logger.warning("Transient failure; retrying...")
        raise RuntimeError("network hiccup")
    csv_text = '''order_id,customer,country,amount,ts
1001,Alice,US,120.50,2024-06-01
1002,Bob,UK,85.00,2024-06-02
1003,Chandra,IN,NaN,2024-06-02
1004,Diego,US,43.20,2024-06-03
1005,Eva,DE,69.99,2024-06-03
'''
    return pd.read_csv(io.StringIO(csv_text))

@task
def transform(df: pd.DataFrame) -> pd.DataFrame:
    logger = get_run_logger()
    logger.info("Transforming...")
    out = df.copy()
    out['amount'] = pd.to_numeric(out['amount'], errors='coerce').fillna(out['amount'].median())
    return out

@task
def load(df: pd.DataFrame, dest_path: str = "warehouse/orders_clean.csv") -> str:
    logger = get_run_logger()
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
    df.to_csv(dest_path, index=False)
    logger.info(f"Wrote {len(df)} rows to {dest_path}")
    return dest_path

## 2) Flow

In [3]:
@flow(name="mini-etl", task_runner=ConcurrentTaskRunner())
def etl_flow(dest_path: str = "warehouse/orders_clean.csv") -> str:
    df = extract()
    tidy = transform(df)
    return load(tidy, dest_path)

result = etl_flow()
result

'warehouse/orders_clean.csv'