[Reference](https://blog.dataengineerthings.org/duckdb-and-cloud-data-warehouses-how-to-choose-for-your-project-54c7576496ae)

In [1]:
import duckdb

# 1. Define the Data Sources
# Note: The read_csv_auto() function reads a local file directly.
LOCAL_SALES_CSV = 'data/local_sales_2023.csv'

# Note: This reads a public Parquet file directly from the internet (simulating S3/GCS).
REMOTE_CATALOG_PARQUET = 'https://blobs.duckdb.org/train_services.parquet'

# 2. Run the Polylingual SQL Query
result = duckdb.sql(f"""
    SELECT
        catalog.pickup_station_name,
        COUNT(sales.*) AS total_sales_count
    FROM
        read_csv_auto('{LOCAL_SALES_CSV}') AS sales
    JOIN
        read_parquet('{REMOTE_CATALOG_PARQUET}') AS catalog
    ON
        sales.item_id = catalog.destination_station_name
    GROUP BY
        1
    LIMIT 5;
""")

print("\n--- DuckDB Result (you can change the results to Python Dataframe) ---")
print(result.to_df().to_markdown(index=False))