WINDOW FUNCTION They perform row-wise calculations over a window(set of rows) related to the current row.
Syntax:
<function_name>() OVER ( PARTITION BY ORDER BY )
TYPES OF FUNCTION
-
Aggregate functions
-
Ranking Functions
-
Value Functions(Analytic functions
-
Aggregate window functions SUM(),AVG(),COUNT(),MIN(),MAX()
-
Ranking Functions ROW_NUMBER() – unique sequential number RANK() – same rank for ties, gaps in ranking DENSE_RANK() – same rank for ties, no gaps
-
Value Functions(Analytic functions LAG() – previous row value LEAD() – next row value FIRST_VALUE() – first value in window LAST_VALUE() – last value in window
SQL EXAMPLE QUERIES
use coss
CREATE TABLE Sales ( SaleID INT IDENTITY(1,1) PRIMARY KEY, SaleDate DATE, CustomerID INT, Amount DECIMAL(10,2) );
-- Insert sample data INSERT INTO Sales (SaleDate, CustomerID, Amount) VALUES ('2023-01-01', 101, 500.00), ('2023-01-02', 102, 700.00), ('2023-01-03', 101, 200.00), ('2023-01-04', 103, 900.00), ('2023-01-05', 102, 300.00), ('2023-01-06', 101, 1000.00);
select * from Sales
VALUE ADDED FUNCTONS EXAMPLE QUERIES
Lead() lets us to look at the value of the next row,which gets the next rows amount based on saledate, it is useful for comparing current vs next sale */ SELECT SaleID,SaleDate,CustomerID,Amount, LEAD(Amount) OVER (ORDER BY SaleDate) AS NextSaleAmount FROM Sales;
/* LAG() retrieves the previous row's value,lag(amount) gets the previous row's amount based on the saledate, and it is useful for finding differences or trends */ SELECT SaleID,SaleDate,CustomerID,Amount, LAG(Amount) OVER (ORDER BY SaleDate) AS PrevSaleAmount FROM Sales;
/* first_value() returns the first value in a result set or partition, always show the first amount from the ordered result set, and Partitioning can be used to get the first sale per customerid */
SELECT SaleID,SaleDate, CustomerID, Amount, FIRST_VALUE(Amount) OVER (ORDER BY SaleDate) AS FirstSaleAmount FROM Sales; --with partition SELECT SaleID,SaleDate,CustomerID,Amount, FIRST_VALUE(Amount) OVER (PARTITION BY CustomerID ORDER BY SaleDate) AS FirstSalePerCustomer FROM Sales;
/*
Last_value() returns the last value , but we need to spcify the frame correcctly, otherwise it only
considers the current row as the last,the frame ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
ensures the entire partition is considered.
*/
SELECT SELECT *from NORTHWIND.dbo.Products
WHERE SupplierID = (
SaleID,SaleDate,CustomerID, Amount,
LAST_VALUE(Amount) OVER (ORDER BY SaleDate
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS LastSaleAmount
FROM Sales;
-- Combined with all the above functions LEAD(),LAG(),First_value(), LAST_VALUE() SELECT SaleID,SaleDate,CustomerID,Amount, LAG(Amount) OVER (PARTITION BY CustomerID ORDER BY SaleDate) AS PrevSale, LEAD(Amount) OVER (PARTITION BY CustomerID ORDER BY SaleDate) AS NextSale, FIRST_VALUE(Amount) OVER (PARTITION BY CustomerID ORDER BY SaleDate) AS FirstSale, LAST_VALUE(Amount) OVER (PARTITION BY CustomerID ORDER BY SaleDate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS LastSale FROM Sales;
/*Row_NUMBER(): Assigns a unique numbers to each row, no ties considered, each row gets a unique sequence numbers */ SELECT SaleID, Amount, ROW_NUMBER() OVER (ORDER BY Amount DESC) AS RowNum FROM Sales;
--RANK(): Tied Rows get the same rank, next is skipped SELECT SaleID,Amount,RANK() OVER (ORDER BY Amount DESC) AS RankNum FROM Sales;
--Dense_Rank():tied rows get the same rank, but the next rank is not skipped
SELECT SaleID,Amount, DENSE_RANK() OVER (ORDER BY Amount DESC) AS DenseRankNum FROM Sales;
--ntile(n): Divides the rows into n groups (tiles),rows are divided into equal groups,useful for quartiles, percentiles, etc.
SELECT SaleID, Amount, NTILE(2) OVER (ORDER BY Amount DESC) AS TileNum FROM Sales;
--ROW_NUMBER() � Order numbers by customer select * from NORTHWIND.dbo.Orders --solution SELECT CustomerID,OrderID,OrderDate, ROW_NUMBER() OVER (PARTITION BY CustomerID ORDER BY OrderDate) AS OrderRank FROM NORTHWIND.dbo.Orders;
/* the above query Assigns a row number to each customer's orders based on date.*/
--RANK() vs DENSE_RANK() � Employees by number of orders select * from NORTHWIND.dbo.Orders --solution SELECT EmployeeID, COUNT(OrderID) AS TotalOrders, RANK() OVER (ORDER BY COUNT(OrderID) DESC) AS RankWithGap, DENSE_RANK() OVER (ORDER BY COUNT(OrderID) DESC) AS RankWithoutGap FROM NORTHWIND.dbo.Orders GROUP BY EmployeeID;
select * from northwind.dbo.Orders
SELECT unitprice, --COUNT(OrderID) AS TotalOrders, RANK() OVER (ORDER BY unitprice DESC) AS RankWithGap from NORTHWIND.dbo.[Order Details]
--dense_rank()
SELECT unitprice, --COUNT(OrderID) AS TotalOrders, dense_RANK() OVER (ORDER BY unitprice DESC) AS RankWithnoGap from NORTHWIND.dbo.[Order Details]
/* the above query Shows difference between RANK() and DENSE_RANK() when there are ties.*/
--LAG() and LEAD() Compare customer orders select * from NORTHWIND.dbo.Orders --solution SELECT CustomerID,OrderID, OrderDate, LAG(OrderDate) OVER (PARTITION BY CustomerID ORDER BY OrderDate) AS PreviousOrderDate, LEAD(OrderDate) OVER (PARTITION BY CustomerID ORDER BY OrderDate) AS NextOrderDate FROM NORTHWIND.dbo.Orders; /* the above Tracks a customers previous and next orders.*/
--SUM() as a window function Running total of order amounts select * from NORTHWIND.dbo.Orders --solution SELECT CustomerID, OrderDate, OrderID,freight, SUM(Freight) OVER (PARTITION BY CustomerID ORDER BY OrderDate) AS RunningFreight FROM NORTHWIND.dbo. Orders;
--NTILE(4) Segment customers into quartiles by order count select * from NORTHWIND.dbo.Orders --solution WITH CustomerOrderCount AS ( SELECT CustomerID, COUNT(OrderID) AS TotalOrders FROM NORTHWIND.dbo. Orders GROUP BY CustomerID ) SELECT , NTILE(4) OVER (ORDER BY TotalOrders DESC) AS Quartile FROM CustomerOrderCount; / the above query Categorizes customers into 4 groups based on order count.*/
/* Function Use Case ROW_NUMBER() Numbering rows in a partition RANK() Ranking with gaps DENSE_RANK() Ranking without gaps LAG() / LEAD() Compare previous/next values SUM() / AVG() OVER Running totals, rolling averages NTILE(n) Bucketing rows (quartiles,deciles, etc.) */