From 1fca0beff61664e4aa136c2601be1594e75abdee Mon Sep 17 00:00:00 2001 From: Bohu Date: Mon, 20 Oct 2025 20:05:45 +0800 Subject: [PATCH] docs: refresh window value function examples (#2856) --- .../08-window-functions/first-value.md | 137 ++++++++++++---- .../08-window-functions/last-value.md | 146 +++++++++++++----- .../08-window-functions/nth-value.md | 103 ++++++------ 3 files changed, 267 insertions(+), 119 deletions(-) diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/first-value.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/first-value.md index 543c43099f..ecbead412e 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/first-value.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/first-value.md @@ -16,7 +16,7 @@ See also: ## Syntax ```sql -FIRST_VALUE(expression) +FIRST_VALUE(expression) [ { RESPECT | IGNORE } NULLS ] OVER ( [ PARTITION BY partition_expression ] ORDER BY sort_expression [ ASC | DESC ] @@ -25,50 +25,123 @@ OVER ( ``` **Arguments:** -- `expression`: Required. The column or expression to return the first value from -- `PARTITION BY`: Optional. Divides rows into partitions -- `ORDER BY`: Required. Determines the ordering within the window -- `window_frame`: Optional. Defines the window frame (default: RANGE UNBOUNDED PRECEDING) +- `expression`: Required. The column or expression to return the first value from. +- `PARTITION BY`: Optional. Divides rows into partitions. +- `ORDER BY`: Required. Determines the ordering within the window. +- `window_frame`: Optional. Defines the window frame. The default is `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`. **Notes:** -- Returns the first value in the ordered window frame -- Supports `IGNORE NULLS` and `RESPECT NULLS` options -- Useful for finding the earliest/lowest value in each group +- Returns the first value in the ordered window frame. +- Supports `IGNORE NULLS` to skip null values and `RESPECT NULLS` to keep the default behaviour. +- Specify an explicit window frame (for example, `ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`) when you need row-based semantics instead of the default range frame. +- Useful for finding the earliest or lowest value in each group or time window. ## Examples ```sql --- Create sample data -CREATE TABLE scores ( - student VARCHAR(20), - score INT +-- Sample order data +CREATE OR REPLACE TABLE orders_window_demo ( + customer VARCHAR, + order_id INT, + order_time TIMESTAMP, + amount INT, + sales_rep VARCHAR ); -INSERT INTO scores VALUES - ('Alice', 95), - ('Bob', 87), - ('Charlie', 82), - ('David', 78), - ('Eve', 92); +INSERT INTO orders_window_demo VALUES + ('Alice', 1001, to_timestamp('2024-05-01 09:00:00'), 120, 'Erin'), + ('Alice', 1002, to_timestamp('2024-05-01 11:00:00'), 135, NULL), + ('Alice', 1003, to_timestamp('2024-05-02 14:30:00'), 125, 'Glen'), + ('Bob', 1004, to_timestamp('2024-05-01 08:30:00'), 90, NULL), + ('Bob', 1005, to_timestamp('2024-05-01 20:15:00'), 105, 'Kai'), + ('Bob', 1006, to_timestamp('2024-05-03 10:00:00'), 95, NULL), + ('Carol', 1007, to_timestamp('2024-05-04 09:45:00'), 80, 'Lily'); ``` -**Get the highest score (first value when ordered by score DESC):** +**Example 1. First purchase per customer** ```sql -SELECT student, score, - FIRST_VALUE(score) OVER (ORDER BY score DESC) AS highest_score, - FIRST_VALUE(student) OVER (ORDER BY score DESC) AS top_student -FROM scores -ORDER BY score DESC; +SELECT customer, + order_id, + order_time, + amount, + FIRST_VALUE(amount) OVER ( + PARTITION BY customer + ORDER BY order_time + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS first_order_amount +FROM orders_window_demo +ORDER BY customer, order_time; ``` Result: ``` -student | score | highest_score | top_student ---------+-------+---------------+------------ -Alice | 95 | 95 | Alice -Eve | 92 | 95 | Alice -Bob | 87 | 95 | Alice -Charlie | 82 | 95 | Alice -David | 78 | 95 | Alice -``` \ No newline at end of file +customer | order_id | order_time | amount | first_order_amount +---------+----------+----------------------+--------+-------------------- +Alice | 1001 | 2024-05-01 09:00:00 | 120 | 120 +Alice | 1002 | 2024-05-01 11:00:00 | 135 | 120 +Alice | 1003 | 2024-05-02 14:30:00 | 125 | 120 +Bob | 1004 | 2024-05-01 08:30:00 | 90 | 90 +Bob | 1005 | 2024-05-01 20:15:00 | 105 | 90 +Bob | 1006 | 2024-05-03 10:00:00 | 95 | 90 +Carol | 1007 | 2024-05-04 09:45:00 | 80 | 80 +``` + +**Example 2. First order in the trailing 24 hours** + +```sql +SELECT customer, + order_id, + order_time, + FIRST_VALUE(order_id) OVER ( + PARTITION BY customer + ORDER BY order_time + RANGE BETWEEN INTERVAL 1 DAY PRECEDING AND CURRENT ROW + ) AS first_order_in_24h +FROM orders_window_demo +ORDER BY customer, order_time; +``` + +Result: +``` +customer | order_id | order_time | first_order_in_24h +---------+----------+----------------------+-------------------- +Alice | 1001 | 2024-05-01 09:00:00 | 1001 +Alice | 1002 | 2024-05-01 11:00:00 | 1001 +Alice | 1003 | 2024-05-02 14:30:00 | 1003 +Bob | 1004 | 2024-05-01 08:30:00 | 1004 +Bob | 1005 | 2024-05-01 20:15:00 | 1004 +Bob | 1006 | 2024-05-03 10:00:00 | 1006 +Carol | 1007 | 2024-05-04 09:45:00 | 1007 +``` + +**Example 3. Skip nulls to find the first named sales rep** + +```sql +SELECT customer, + order_id, + sales_rep, + FIRST_VALUE(sales_rep) RESPECT NULLS OVER ( + PARTITION BY customer + ORDER BY order_time + ) AS first_rep_respect, + FIRST_VALUE(sales_rep) IGNORE NULLS OVER ( + PARTITION BY customer + ORDER BY order_time + ) AS first_rep_ignore +FROM orders_window_demo +ORDER BY customer, order_id; +``` + +Result: +``` +customer | order_id | sales_rep | first_rep_respect | first_rep_ignore +---------+----------+-----------+-------------------+------------------ +Alice | 1001 | Erin | Erin | Erin +Alice | 1002 | NULL | Erin | Erin +Alice | 1003 | Glen | Erin | Erin +Bob | 1004 | NULL | NULL | NULL +Bob | 1005 | Kai | NULL | Kai +Bob | 1006 | NULL | NULL | Kai +Carol | 1007 | Lily | Lily | Lily +``` diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/last-value.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/last-value.md index 00045e39a5..4f46d3722b 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/last-value.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/last-value.md @@ -16,7 +16,7 @@ See also: ## Syntax ```sql -LAST_VALUE(expression) +LAST_VALUE(expression) [ { RESPECT | IGNORE } NULLS ] OVER ( [ PARTITION BY partition_expression ] ORDER BY sort_expression [ ASC | DESC ] @@ -25,57 +25,125 @@ OVER ( ``` **Arguments:** -- `expression`: Required. The column or expression to return the last value from -- `PARTITION BY`: Optional. Divides rows into partitions -- `ORDER BY`: Required. Determines the ordering within the window -- `window_frame`: Optional. Defines the window frame (default: RANGE UNBOUNDED PRECEDING) +- `expression`: Required. The column or expression to return the last value from. +- `PARTITION BY`: Optional. Divides rows into partitions. +- `ORDER BY`: Required. Determines the ordering within the window. +- `window_frame`: Optional. Defines the window frame. The default is `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`. **Notes:** -- Returns the last value in the ordered window frame -- Supports `IGNORE NULLS` and `RESPECT NULLS` options -- Often requires explicit window frame to get expected results -- Useful for finding the latest/highest value in each group +- Returns the last value in the ordered window frame. +- Supports `IGNORE NULLS` to skip null values and `RESPECT NULLS` to keep the default behaviour. +- Use a frame that ends after the current row (for example, `ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING`) when you need the true last row of a partition. +- Useful for finding the latest value in each group, or the most recent value inside a look-ahead window. ## Examples ```sql --- Create sample data -CREATE TABLE scores ( - student VARCHAR(20), - score INT +-- Sample order data +CREATE OR REPLACE TABLE orders_window_demo ( + customer VARCHAR, + order_id INT, + order_time TIMESTAMP, + amount INT, + sales_rep VARCHAR ); -INSERT INTO scores VALUES - ('Alice', 95), - ('Bob', 87), - ('Charlie', 82), - ('David', 78), - ('Eve', 92); +INSERT INTO orders_window_demo VALUES + ('Alice', 1001, to_timestamp('2024-05-01 09:00:00'), 120, 'Erin'), + ('Alice', 1002, to_timestamp('2024-05-01 11:00:00'), 135, NULL), + ('Alice', 1003, to_timestamp('2024-05-02 14:30:00'), 125, 'Glen'), + ('Bob', 1004, to_timestamp('2024-05-01 08:30:00'), 90, NULL), + ('Bob', 1005, to_timestamp('2024-05-01 20:15:00'), 105, 'Kai'), + ('Bob', 1006, to_timestamp('2024-05-03 10:00:00'), 95, NULL), + ('Carol', 1007, to_timestamp('2024-05-04 09:45:00'), 80, 'Lily'); ``` -**Get the lowest score (last value when ordered by score DESC):** +**Example 1. Latest order in each customer partition** ```sql -SELECT student, score, - LAST_VALUE(score) OVER ( - ORDER BY score DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) AS lowest_score, - LAST_VALUE(student) OVER ( - ORDER BY score DESC - ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING - ) AS lowest_student -FROM scores -ORDER BY score DESC; +SELECT customer, + order_id, + order_time, + LAST_VALUE(order_id) OVER ( + PARTITION BY customer + ORDER BY order_time + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + ) AS last_order_for_customer +FROM orders_window_demo +ORDER BY customer, order_time; ``` Result: ``` -student | score | lowest_score | lowest_student ---------+-------+--------------+--------------- -Alice | 95 | 78 | David -Eve | 92 | 78 | David -Bob | 87 | 78 | David -Charlie | 82 | 78 | David -David | 78 | 78 | David -``` \ No newline at end of file +customer | order_id | order_time | last_order_for_customer +---------+----------+----------------------+------------------------- +Alice | 1001 | 2024-05-01 09:00:00 | 1003 +Alice | 1002 | 2024-05-01 11:00:00 | 1003 +Alice | 1003 | 2024-05-02 14:30:00 | 1003 +Bob | 1004 | 2024-05-01 08:30:00 | 1006 +Bob | 1005 | 2024-05-01 20:15:00 | 1006 +Bob | 1006 | 2024-05-03 10:00:00 | 1006 +Carol | 1007 | 2024-05-04 09:45:00 | 1007 +``` + +**Example 2. Peek 12 hours ahead within each customer** + +```sql +SELECT customer, + order_id, + order_time, + amount, + LAST_VALUE(amount) OVER ( + PARTITION BY customer + ORDER BY order_time + RANGE BETWEEN CURRENT ROW AND INTERVAL 12 HOUR FOLLOWING + ) AS last_amount_next_12h +FROM orders_window_demo +ORDER BY customer, order_time; +``` + +Result: +``` +customer | order_id | order_time | amount | last_amount_next_12h +---------+----------+----------------------+--------+---------------------- +Alice | 1001 | 2024-05-01 09:00:00 | 120 | 135 +Alice | 1002 | 2024-05-01 11:00:00 | 135 | 135 +Alice | 1003 | 2024-05-02 14:30:00 | 125 | 125 +Bob | 1004 | 2024-05-01 08:30:00 | 90 | 105 +Bob | 1005 | 2024-05-01 20:15:00 | 105 | 105 +Bob | 1006 | 2024-05-03 10:00:00 | 95 | 95 +Carol | 1007 | 2024-05-04 09:45:00 | 80 | 80 +``` + +**Example 3. Skip nulls when scanning forward for the last sales rep** + +```sql +SELECT customer, + order_id, + sales_rep, + LAST_VALUE(sales_rep) RESPECT NULLS OVER ( + PARTITION BY customer + ORDER BY order_time + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + ) AS last_rep_respect, + LAST_VALUE(sales_rep) IGNORE NULLS OVER ( + PARTITION BY customer + ORDER BY order_time + ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING + ) AS last_rep_ignore +FROM orders_window_demo +ORDER BY customer, order_id; +``` + +Result: +``` +customer | order_id | sales_rep | last_rep_respect | last_rep_ignore +---------+----------+-----------+------------------+----------------- +Alice | 1001 | Erin | Glen | Glen +Alice | 1002 | NULL | Glen | Glen +Alice | 1003 | Glen | Glen | Glen +Bob | 1004 | NULL | NULL | Kai +Bob | 1005 | Kai | NULL | Kai +Bob | 1006 | NULL | NULL | Kai +Carol | 1007 | Lily | Lily | Lily +``` diff --git a/docs/en/sql-reference/20-sql-functions/08-window-functions/nth-value.md b/docs/en/sql-reference/20-sql-functions/08-window-functions/nth-value.md index 68af57af6c..026f822213 100644 --- a/docs/en/sql-reference/20-sql-functions/08-window-functions/nth-value.md +++ b/docs/en/sql-reference/20-sql-functions/08-window-functions/nth-value.md @@ -20,7 +20,7 @@ NTH_VALUE( expression, n ) -[ { IGNORE | RESPECT } NULLS ] +[ { RESPECT | IGNORE } NULLS ] OVER ( [ PARTITION BY partition_expression ] ORDER BY order_expression @@ -29,67 +29,74 @@ OVER ( ``` **Arguments:** -- `expression`: The column or expression to evaluate -- `n`: Position number (1-based index) of the value to return -- `IGNORE NULLS`: Optional. When specified, NULL values are skipped when counting positions -- `RESPECT NULLS`: Default behavior. NULL values are included when counting positions +- `expression`: Required. The column or expression to evaluate. +- `n`: Required. Position number (1-based) of the value to return. +- `IGNORE NULLS`: Optional. Skips null values when counting positions. +- `RESPECT NULLS`: Optional. Keeps null values when counting positions (default). +- `window_frame`: Optional. Defines the window frame. The default is `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`. **Notes:** -- Position counting starts from 1 (not 0) -- Returns NULL if the specified position doesn't exist in the window frame -- For window frame syntax, see [Window Frame Syntax](index.md#window-frame-syntax) +- `n` must be a positive integer; `n = 1` is equivalent to `FIRST_VALUE`. +- Returns `NULL` if the specified position does not exist in the frame. +- Combine with `ROWS BETWEEN ...` to control whether the position is evaluated over the whole partition or the rows seen so far. +- For window frame syntax, see [Window Frame Syntax](index.md#window-frame-syntax). ## Examples ```sql --- Create sample data -CREATE TABLE scores ( - student VARCHAR(20), - score INT +-- Sample order data +CREATE OR REPLACE TABLE orders_window_demo ( + customer VARCHAR, + order_id INT, + order_time TIMESTAMP, + amount INT, + sales_rep VARCHAR ); -INSERT INTO scores VALUES - ('Alice', 85), - ('Bob', 90), - ('Charlie', 78), - ('David', 92), - ('Eve', 88); +INSERT INTO orders_window_demo VALUES + ('Alice', 1001, to_timestamp('2024-05-01 09:00:00'), 120, 'Erin'), + ('Alice', 1002, to_timestamp('2024-05-01 11:00:00'), 135, NULL), + ('Alice', 1003, to_timestamp('2024-05-02 14:30:00'), 125, 'Glen'), + ('Bob', 1004, to_timestamp('2024-05-01 08:30:00'), 90, NULL), + ('Bob', 1005, to_timestamp('2024-05-01 20:15:00'), 105, 'Kai'), + ('Bob', 1006, to_timestamp('2024-05-03 10:00:00'), 95, NULL), + ('Carol', 1007, to_timestamp('2024-05-04 09:45:00'), 80, 'Lily'); ``` -**Get the 2nd highest score student:** +**Find the second order and illustrate null-handling for the second sales rep:** ```sql -SELECT student, score, - NTH_VALUE(student, 2) OVER (ORDER BY score DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS second_highest_student -FROM scores; +SELECT customer, + order_id, + order_time, + NTH_VALUE(order_id, 2) OVER ( + PARTITION BY customer + ORDER BY order_time + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS second_order_so_far, + NTH_VALUE(sales_rep, 2) RESPECT NULLS OVER ( + PARTITION BY customer + ORDER BY order_time + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS second_rep_respect, + NTH_VALUE(sales_rep, 2) IGNORE NULLS OVER ( + PARTITION BY customer + ORDER BY order_time + ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS second_rep_ignore +FROM orders_window_demo +ORDER BY customer, order_time; ``` Result: ``` -student | score | second_highest_student ----------+-------+----------------------- -David | 92 | Bob -Bob | 90 | Bob -Eve | 88 | Bob -Alice | 85 | Bob -Charlie | 78 | Bob +customer | order_id | order_time | second_order_so_far | second_rep_respect | second_rep_ignore +---------+----------+----------------------+---------------------+--------------------+------------------- +Alice | 1001 | 2024-05-01 09:00:00 | NULL | NULL | NULL +Alice | 1002 | 2024-05-01 11:00:00 | 1002 | NULL | NULL +Alice | 1003 | 2024-05-02 14:30:00 | 1002 | NULL | Glen +Bob | 1004 | 2024-05-01 08:30:00 | NULL | NULL | NULL +Bob | 1005 | 2024-05-01 20:15:00 | 1005 | Kai | Kai +Bob | 1006 | 2024-05-03 10:00:00 | 1005 | Kai | Kai +Carol | 1007 | 2024-05-04 09:45:00 | NULL | NULL | NULL ``` - -**Get the 3rd highest score student:** - -```sql -SELECT student, score, - NTH_VALUE(student, 3) OVER (ORDER BY score DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS third_highest_student -FROM scores; -``` - -Result: -``` -student | score | third_highest_student ----------+-------+---------------------- -David | 92 | Eve -Bob | 90 | Eve -Eve | 88 | Eve -Alice | 85 | Eve -Charlie | 78 | Eve -``` \ No newline at end of file