In [None]:
import duckdb

# Load SQL extension
%load_ext sql

# Initialize 🦆 DuckDB connection
conn = duckdb.connect()

# Import database
%sql conn --alias duckdb
%sql IMPORT DATABASE '../../data/nps';

DuckDB supports Python UDFs— so if you'd like to use a function in SQL on a dataset, you can!

In [None]:
import duckdb
from duckdb.typing import *
from faker import Faker


def random_emoji():
    fake = Faker()
    fake.add_provider("emoji")
    return fake.emoji()


conn.create_function("random_emoji", random_emoji, [], "VARCHAR", side_effects=True)

In [None]:
%%sql
SELECT
    fullname,
    random_emoji() as 👋
FROM nps_public_data.parks
LIMIT 10;
    

In [None]:
import pyarrow.compute as pc


def swap_case(x):
    # Swap the case of the 'column' using utf8_swapcase and return the result
    return pc.utf8_swapcase(x)


# To register the function, we must define it's type to be 'arrow'
conn.create_function("swap_case", swap_case, ["VARCHAR"], "VARCHAR", type="arrow")

In [None]:
%%sql
SELECT
    swap_case(fullname) as pARK_nAME
FROM nps_public_data.parks
LIMIT 10

DuckDB also supports _lambdas_, which operate on every item in a list. Take for example `list_transform`, which can [act on every element in a list](https://duckdb.org/docs/sql/functions/lambda.html#transform).

In [None]:
%%sql
-- Which parks are fully or partially in Utah?
WITH park_states AS (
    SELECT 
        fullname,
        states AS states_string, 
        split(states, ',') ::string[] AS states_list
    FROM nps_public_data.parks p
    )
SELECT 
    fullname,
    states_list,
    -- Transform lists of strings to lowercase
    list_transform(states_list, x -> lower(x)) as lower_states_list,
    -- You can even combine lambdas with UDFs
    list_transform(states_list, x -> swap_case(x)) as lower_states_list,
FROM park_states
WHERE list_contains(states_list, 'UT')
LIMIT 5

Other functions like `list_reduce` or `list_filter` can be helpful, too.

In [None]:
%%sql
-- Which parks are fully or partially in Utah?
WITH park_states AS (
    SELECT 
        fullname,
        states AS states_string, 
        split(states, ',') ::string[] AS states_list
    FROM nps_public_data.parks p
    )
SELECT 
    fullname,
    states_string,
    states_list,
    -- Transform list back to a string 🙃
    list_reduce(states_list, (x, y) -> concat(x, ', ', y)) as new_states_string,
    -- Filter out utah from list
    list_filter(states_list, x -> x != 'UT') as states_list_no_ut,
FROM park_states
WHERE list_contains(states_list, 'UT')
AND len(states_list) > 1
LIMIT 5