-
Notifications
You must be signed in to change notification settings - Fork 19
/
21_WindowFunctions_Solutions.sql
294 lines (233 loc) · 8.5 KB
/
21_WindowFunctions_Solutions.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
/******************************************************************************
Course videos: https://www.red-gate.com/hub/university/courses/t-sql/tsql-for-beginners
Course scripts: https://litknd.github.io/TSQLBeginners
Ranking, Numbering, and Running Totals with Windowing Functions
SOLUTION FILE
*****************************************************************************/
/* ✋🏻 Doorstop ✋🏻 */
RAISERROR(N'Did you mean to run the whole thing?', 20, 1) WITH LOG;
GO
USE WideWorldImporters;
GO
/* Q1
Using the Sales.CustomerTransactions and Sales.Customers tables
joined on CustomerID
Select four columns:
CustomerName
TransactionDate
TransactionAmount
RunningTotalByDate, defined as:
A running total of the TransactionAmount
for that CustomerID
by TransactionDate Ascending
Return rows only for CustomerID 1057
The query should return 36 rows
*/
SELECT c.CustomerName,
ct.TransactionDate,
ct.TransactionAmount,
SUM(ct.TransactionAmount) OVER (PARTITION BY ct.CustomerID ORDER BY ct.TransactionDate ASC) AS RunningTotalByDate
FROM Sales.CustomerTransactions AS ct
JOIN Sales.Customers AS c
ON ct.CustomerID = c.CustomerID
WHERE c.CustomerID = 1057;
--Since we are returning this for only one customer, do we need the "partition by"?
SELECT c.CustomerName,
ct.TransactionDate,
ct.TransactionAmount,
SUM(ct.TransactionAmount) OVER (ORDER BY ct.TransactionDate ASC) AS RunningTotalByDate
FROM Sales.CustomerTransactions AS ct
JOIN Sales.Customers AS c
ON ct.CustomerID = c.CustomerID
WHERE c.CustomerID = 1057;
GO
/* Q2
Using the Sales.CustomerTransactions and Sales.Customers tables
joined on CustomerID
Select four columns:
TransactionRank, defined as:
The DENSE_RANK of the row,
based on TransactionAmount
(highest TransactionAmount in the whole table = #1, etc)
CustomerName
TransactionDate
TransactionAmount
Return only the TOP 10 ranked TransactionAmounts for all time
Make sure the query allows for more than 10 rows to come back if there are ties
*/
--We can *try* this...
SELECT DENSE_RANK() OVER (ORDER BY ct.TransactionAmount DESC) AS TransactionRank,
c.CustomerName,
ct.TransactionDate,
ct.TransactionAmount
FROM Sales.CustomerTransactions AS ct
JOIN Sales.Customers AS c
ON ct.CustomerID = c.CustomerID
WHERE DENSE_RANK() OVER (ORDER BY ct.TransactionAmount DESC) <= 10;
GO
--But...
--Msg 4108, Level 15, State 1, Line 85
--Windowed functions can only appear in the SELECT or ORDER BY clauses.
--The wording in this problem could be misunderstood (easily, words are hard)
--If we want to include ranks 1-10, then we do not want to use TOP with TIES
--TOP WITH TIES handles ties *for last place*
--But what if we had a lot of ties for rank #2, for example? We might not get many ranks
SELECT TOP 10 WITH TIES
DENSE_RANK() OVER (ORDER BY ct.TransactionAmount DESC) AS TransactionRank,
c.CustomerName,
ct.TransactionDate,
ct.TransactionAmount
FROM Sales.CustomerTransactions AS ct
JOIN Sales.Customers AS c
ON ct.CustomerID = c.CustomerID
ORDER BY TransactionRank ASC;
--This will show us ranks 1-10, no matter where the ties may be
SELECT *
FROM
(
SELECT DENSE_RANK() OVER (ORDER BY ct.TransactionAmount DESC) AS TransactionRank,
c.CustomerName,
ct.TransactionDate,
ct.TransactionAmount
FROM Sales.CustomerTransactions AS ct
JOIN Sales.Customers AS c
ON ct.CustomerID = c.CustomerID
) AS t
WHERE t.TransactionRank <= 10
ORDER BY t.TransactionRank;
--You could also write this with a CTE
WITH t
AS (SELECT DENSE_RANK() OVER (ORDER BY ct.TransactionAmount DESC) AS TransactionRank,
c.CustomerName,
ct.TransactionDate,
ct.TransactionAmount
FROM Sales.CustomerTransactions AS ct
JOIN Sales.Customers AS c
ON ct.CustomerID = c.CustomerID)
SELECT *
FROM t
WHERE t.TransactionRank <= 10
ORDER BY TransactionRank;
/* Q3
This is similar to the previous question, however...
You need to return an additional column, TransactionYear, and...
You are returning the top 10 ranked transactions for *each* year
which has rows in the CustomerTransactions table
Using the Sales.CustomerTransactions and Sales.Customers tables
joined on CustomerID
Select five columns:
TransactionYear, defined as:
The year of the TransactionDate (use the built in YEAR() function)
TransactionRank, defined as:
The DENSE_RANK of the row for the year of that TransactionDate,
based on TransactionAmount
(highest TransactionAmount in the whole table = #1, etc)
CustomerName
TransactionDate
TransactionAmount
Return only the TOP 10 ranked TransactionAmounts for each year
Make sure the query allows for more than 10 rows to come back for each year if there are ties
*/
SELECT *
FROM
(
SELECT YEAR(ct.TransactionDate) AS TransactionYear,
DENSE_RANK() OVER (PARTITION BY YEAR(ct.TransactionDate)
ORDER BY ct.TransactionAmount DESC
) AS TransactionRank,
c.CustomerName,
ct.TransactionDate,
ct.TransactionAmount
FROM Sales.CustomerTransactions AS ct
JOIN Sales.Customers AS c
ON ct.CustomerID = c.CustomerID
) AS t
WHERE t.TransactionRank <= 10
ORDER BY t.TransactionYear,
t.TransactionRank;
GO
--this could also be written with a CTE
with t as (
SELECT YEAR(ct.TransactionDate) AS TransactionYear,
DENSE_RANK() OVER (PARTITION BY YEAR(ct.TransactionDate)
ORDER BY ct.TransactionAmount DESC
) AS TransactionRank,
c.CustomerName,
ct.TransactionDate,
ct.TransactionAmount
FROM Sales.CustomerTransactions AS ct
JOIN Sales.Customers AS c
ON ct.CustomerID = c.CustomerID
)
SELECT *
FROM t
WHERE t.TransactionRank <= 10
ORDER BY t.TransactionYear,
t.TransactionRank;
GO
/* Q4
Using the Sales.CustomerTransactions and Sales.Customers tables
joined on CustomerID
Select six columns:
CustomerName
Median, defined as...
The median AmountExcludingTax for that CustomerID
Use PERCENTILE_CONT(0.5) to calculate the median
Average, defined as...
The average AmountExcludingTax for that CustomerID
Maxmimum, defined as
The maximum AmountExcludingTax for that CustomerID
Minimum, defined as
The minimum AmountExcludingTax for that CustomerID
TransactionCount, defined as
The count of rows for that CustomerID
Return only one row per CustomerID
Order the results by the Median (as defined above) descending
The query should return 263 rows
*/
SELECT DISTINCT
c.CustomerName,
PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY ct.AmountExcludingTax) OVER (PARTITION BY ct.CustomerID) AS [Median],
AVG(ct.AmountExcludingTax) OVER (PARTITION BY ct.CustomerID) AS [Average],
MAX(ct.AmountExcludingTax) OVER (PARTITION BY ct.CustomerID) AS [Maximum],
MIN(ct.AmountExcludingTax) OVER (PARTITION BY ct.CustomerID) AS [Minimum],
COUNT(ct.CustomerID) OVER (PARTITION BY ct.CustomerID) AS [TransactionCount]
FROM Sales.CustomerTransactions AS ct
JOIN Sales.Customers AS c
ON ct.CustomerID = c.CustomerID
ORDER BY Median DESC;
GO
/* Q5
Table change!
Using the Sales.Customers and Sales.Invoices tables
joined on CustomerID
Select three columns:
RankByInvoiceCount, defined as...
The RANK() of the customer based on the count of invoices
Highest number of invoices should have RankByInvoiceCount = 1
CustomerName
InvoiceCount, defined as...
The count of invoices for that CustomerName
Return only rows where RankByInvoiceCount < 11
Order the results by RankByInvoiceCount
*/
--USE RANK to rank customers by invoice count
--I defend my use of SELECT * in this particular scenario
--(But feel free too argue with me! )
WITH invoicecounts
AS (SELECT c.CustomerName,
COUNT(*) AS InvoiceCount
FROM Sales.Customers AS c
JOIN Sales.Invoices AS si
ON c.CustomerID = si.CustomerID
GROUP BY c.CustomerName),
rankbyinvoicecounts
AS (SELECT RANK() OVER (ORDER BY ic.InvoiceCount DESC) AS RankByInvoiceCount,
ic.CustomerName,
ic.InvoiceCount
FROM invoicecounts AS ic)
SELECT *
FROM rankbyinvoicecounts
WHERE RankByInvoiceCount < 11
ORDER BY RankByInvoiceCount;
GO