In [1]:
import duckdb

# Load SQL extension, configure display limit
%load_ext sql
%config SqlMagic.displaylimit = 0

# Initialize 🦆 DuckDB connection
conn = duckdb.connect()

# Import database
%sql conn --alias duckdb
%sql IMPORT DATABASE '../../data/nps';

Deploy Streamlit apps for free on Ploomber Cloud! Learn more: https://ploomber.io/s/signup


Count
40


The Basics

In [2]:
%%sql
SELECT
    states,
    COUNT(*) as num_parks
FROM nps_public_data.parks p
WHERE designation = 'National Park'
GROUP BY 1
ORDER BY 2 DESC
LIMIT 20

states,num_parks
CA,5
UT,5
FL,3
CO,3
WA,3
AZ,3
TX,2
NM,2
SD,2
AK,2


In [3]:
%%sql
WITH park_list AS (
    SELECT
        fullname,
        UNNEST(
            SPLIT(states, ',')::string[]
            ) as state
    FROM nps_public_data.parks p
    WHERE designation = 'National Park'
)
SELECT
    state,
    COUNT(*) as num_parks
FROM park_list
GROUP BY 1
ORDER BY 2 DESC, 1
LIMIT 10

state,num_parks
CA,6
UT,5
AZ,3
CO,3
FL,3
WA,3
AK,2
HI,2
MT,2
NM,2


How do we find the campgrounds with the least and most sites using aggregations?

In [4]:
%%sql
WITH park_campgrounds AS (
    SELECT
        c.name as campgroud_name,
        p.fullname as park_name,
        c.numberofsitesfirstcomefirstserve + c.numberofsitesreservable as total_sites,
    FROM nps_public_data.campgrounds c
    INNER JOIN nps_public_data.parks p
        ON c.parkcode = p.parkcode
        AND p.designation = 'National Park'
    GROUP BY 1,2,3
), min_max_sites AS (
SELECT
    MIN(total_sites) as min_sites,
    MAX(total_sites) as max_sites
FROM park_campgrounds
WHERE total_sites > 0
)
SELECT
    campgroud_name,
    total_sites as num_sites,
    CASE total_sites WHEN min_sites THEN 'least' ELSE 'most' END as sites_rank
FROM park_campgrounds pc
INNER JOIN min_max_sites mms
    ON (pc.total_sites = mms.min_sites OR pc.total_sites = mms.max_sites)
ORDER BY num_sites, campgroud_name

campgroud_name,num_sites,sites_rank
Dunbar Group Site,1,least
Frijole Horse Corral Campground,1,least
Group Campsite,1,least
Hay Bay Campground,1,least
Pickerel Cove Campground,1,least
Roundup Group Horse Camp,1,least
Upper Goodell Creek Group Campsite,1,least
Morefield Campground,534,most


What about the parks?

In [5]:
%%sql
WITH park_campgrounds AS (
    SELECT
        c.name as campgroud_name,
        p.fullname as park_name,
        c.numberofsitesfirstcomefirstserve + c.numberofsitesreservable as total_sites,
    FROM nps_public_data.campgrounds c
    INNER JOIN nps_public_data.parks p
        ON c.parkcode = p.parkcode
        AND p.designation = 'National Park'
    GROUP BY 1,2,3
), park_sites AS (
    SELECT
        park_name,
        SUM(total_sites) as num_sites
    FROM park_campgrounds
    GROUP BY 1
    ORDER BY 2 DESC
), min_max_sites AS (
    SELECT
        MIN(num_sites) as min_sites,
        MAX(num_sites) as max_sites
    FROM park_sites ps
)
SELECT
    ps.*,
    CASE num_sites WHEN min_sites THEN 'least' ELSE 'most' END as sites_rank
FROM park_sites ps
INNER JOIN min_max_sites mms
    ON (num_sites = mms.min_sites or num_sites = mms.max_sites)
ORDER BY ps.num_sites DESC

park_name,num_sites,sites_rank
Yellowstone National Park,2149,most
Dry Tortugas National Park,8,least


Other basic aggregations

In [7]:
%%sql
SELECT
    p.fullname as park_name,
    -- COUNT the number of campgrounds
    COUNT(DISTINCT c.name) as num_campgrounds,
    -- Get the average number of sites— what is this returning?
    ROUND(AVG(numberofsitesreservable), 2) as avg_sites_reservable,
    ROUND(AVG(numberofsitesfirstcomefirstserve), 2) as avg_sites_fcfs,
    ROUND(AVG(numberofsitesreservable + numberofsitesfirstcomefirstserve), 2) as avg_total_sites
FROM nps_public_data.campgrounds c
INNER JOIN nps_public_data.parks p
    ON c.parkcode = p.parkcode
    AND p.designation = 'National Park'
GROUP BY 1
ORDER BY 2 DESC
LIMIT 10
-- Read more about aggregates here: https://duckdb.org/docs/sql/aggregates

park_name,num_campgrounds,avg_sites_reservable,avg_sites_fcfs,avg_total_sites
Isle Royale National Park,36,1.19,5.58,6.78
Yosemite National Park,14,75.64,21.64,97.29
Great Smoky Mountains National Park,13,72.54,0.0,72.54
Guadalupe Mountains National Park,13,8.46,0.0,8.46
Olympic National Park,13,36.85,23.31,60.15
Glacier National Park,13,64.38,13.31,77.69
Death Valley National Park,12,11.33,52.08,63.42
Yellowstone National Park,12,179.08,0.0,179.08
North Cascades National Park,11,26.64,7.82,34.45
Joshua Tree National Park,9,47.0,8.56,55.56


In [10]:
%%sql
SELECT
    c.name
FROM nps_public_data.campgrounds c
INNER JOIN nps_public_data.parks p
    ON c.parkcode = p.parkcode
    AND p.designation = 'National Park'
    AND p.fullname = 'Yellowstone National Park'

name
Norris Campground
Pebble Creek Campground
Tower Fall Campground
Bridge Bay Campground
Canyon Campground
Fishing Bridge RV Park
Grant Village Campground
Indian Creek Campground
Lewis Lake Campground
Madison Campground
