In [None]:
import duckdb

# Load SQL extension
%load_ext sql

# Initialize 🦆 DuckDB connection
conn = duckdb.connect()

# Import database
%sql conn --alias duckdb
%sql IMPORT DATABASE '../../data/nps';

Build a query to fetch all the parks in Utah and order the results by the park name.

In [None]:
%%sql
SELECT
    name, 
    states
FROM nps_public_data.parks p
WHERE p.states ILIKE '%UT%'
ORDER BY 1
LIMIT 5;

Build a query to fetch all the National Parks that cross state boundaries. 

Hint: `parks.states` is a string representation of a list, i.e. `UT,CA,NC`. The `parks` table includes parks that aren't National Parks.

In [None]:
%%sql
SELECT
    name,
    states
FROM nps_public_data.parks p
WHERE p.states LIKE '%,%'
    AND p.designation ILIKE '%National Park%'

For all national parks, return the `states` column as a `STRUCT` type with each element as a state

In [None]:
%%sql
SELECT
    name,
    states,
    split(states, ',') ::string[] AS states_list
FROM nps_public_data.parks p
WHERE 1 = 1
    AND p.designation ILIKE '%National Park%'

Which parks are in either Montana or Wyoming?

In [None]:
%%sql
SELECT
    name,
    states,
    split(states, ',') ::string[] AS states_list
FROM nps_public_data.parks p
WHERE 1 = 1
    AND p.designation ILIKE '%National Park%'
    AND list_has_any(states_list, ['MT', 'WY'])

What about _both_ Montana _and_ wyoming?

In [None]:
%%sql
SELECT
    name,
    states,
    split(states, ',') ::string[] AS states_list
FROM nps_public_data.parks p
WHERE 1 = 1
    AND p.designation ILIKE '%National Park%'
    AND list_has_all(states_list, ['MT', 'WY'])

Which park is in the greatest number of states?

In [None]:
%%sql
SELECT
    name,
    states,
    split(states, ',') ::string[] AS states_list, 
    length(states_list) AS num_states 
FROM nps_public_data.parks p
WHERE 1 = 1
    AND p.designation LIKE 'National Park%'
ORDER BY num_states DESC

Now, how many parks are in each "group" of state border-crossings?

Hint: we're grouping by the _number_ of states.

In [None]:
%%sql
WITH num_states AS (
    SELECT
        name,
        states,
        split(states, ',') ::string[] AS states_list, 
        length(states_list) AS num_states 
    FROM nps_public_data.parks p
    WHERE 1 = 1
        AND p.designation ILIKE '%National Park%'
    ORDER BY num_states DESC
)
SELECT
    num_states,
    COUNT(*) as num_parks
FROM num_states
GROUP BY 1
ORDER BY 1

What's the percentage share of the total? Hint: window functions might be helpful.

In [None]:
%%sql
WITH num_states AS (
    SELECT
        name,
        states,
        split(states, ',') ::string[] AS states_list, 
        length(states_list) AS num_states 
    FROM nps_public_data.parks p
    WHERE 1 = 1
        AND p.designation ILIKE '%National Park%'
    ORDER BY num_states DESC
)
SELECT
    num_states,
    COUNT(*) as num_parks,
    ROUND(COUNT(*) / SUM(COUNT(*)) OVER (), 2) AS pct_parks
FROM num_states
GROUP BY 1
ORDER BY 1

Write a query that returns the _largest_ campsite in each park. As a bonus, do this _without_ using a CTE (filter in the same query)

In [None]:
%%sql
SELECT
    p.fullname as park_name,
    c.name as campground_name,
    c.numberofsitesfirstcomefirstserve + c.numberofsitesreservable as num_campsites,
    RANK() OVER (PARTITION BY park_name ORDER BY c.numberofsitesfirstcomefirstserve + c.numberofsitesreservable DESC) as park_campsites_rank,
FROM nps_public_data.campgrounds c
INNER JOIN nps_public_data.parks p
    ON c.parkcode = p.parkcode
    AND p.designation LIKE 'National Park%'
QUALIFY park_campsites_rank = 1
ORDER BY num_campsites DESC, park_name, park_campsites_rank ASC
LIMIT 12;

Say you'll be in California this spring and have time for three National Parks visits. How many combinations of national parks can you visit? Can you return the combinations in a list ordered by the name of the first park?

In [None]:
%%sql
-- What about 3?
WITH california_parks AS (
    SELECT
        *
    FROM nps_public_data.parks p
    WHERE states ILIKE '%CA%'
        AND p.designation ILIKE '%National Park%'
)
SELECT
    cp.name as first_visit,
    cp2.name as second_visit,
    cp3.name as third_visit,
    ROW_NUMBER() OVER (ORDER BY cp.fullName ASC) as rn
FROM california_parks cp
CROSS JOIN california_parks cp2
CROSS JOIN california_parks cp3
WHERE 1 = 1
    AND cp.fullName != cp2.fullName
    AND cp.fullName != cp3.fullName
    AND cp2.fullName != cp3.fullName
ORDER BY rn asc

For our final exercise— find the combinations in alphabetical order, that is, the first letter of each visit occurs in the order of the alphabet, e.g. `[C]hannel Islands, [D]eath Valley, [J]oshua Tree` would satisfy that condition.

In [None]:
%%sql
WITH california_parks AS (
    SELECT
        *
    FROM nps_public_data.parks p
    WHERE states ILIKE '%CA%'
        AND p.designation ILIKE '%National Park%'
), o AS (
    SELECT
        cp.name as first_visit,
        cp2.name as second_visit,
        cp3.name as third_visit,
    FROM california_parks cp
    CROSS JOIN california_parks cp2
    CROSS JOIN california_parks cp3
    WHERE 1 = 1
        AND cp.fullName != cp2.fullName
        AND cp.fullName != cp3.fullName
        AND cp2.fullName != cp3.fullName
)
SELECT
    *,
    (lower(first_visit) < lower(second_visit)) 
        AND (lower(second_visit) < lower(third_visit)) as is_ascending,
    ROW_NUMBER() OVER (ORDER BY first_visit ASC) as rn
FROM o
WHERE is_ascending
