-
Notifications
You must be signed in to change notification settings - Fork 0
/
window_functions.sql
213 lines (145 loc) · 6.11 KB
/
window_functions.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
create table bookings(
booking_id int,
name varchar,
host_id int,
host_name varchar,
neighbourhood_group varchar(50),
neighbourhood varchar(50),
latitude decimal(7,5),
longitude decimal(7,5),
room_type varchar(50),
price int,
minimum_nights int,
number_of_reviews int,
last_review date,
reviews_per_month decimal(4,2),
calculated_host_listings_count int,
availability_365 int,
primary key(booking_id)
)
drop table bookings;
-- sql shell command to copy dtaa from csv file
ny_airbnb=# \COPY bookings FROM 'C:\Users\Pranav\OneDrive\Documents\AB_NYC_2019.csv' delimiter ',' CSV HEADER ENCODING 'utf-8'
COPY 48895
select * from bookings
select max(len)
from(
select length(name) as len
from bookings
) as subquery;
-- over()
select booking_id, name, host_name, neighbourhood, price,
avg(price) over(),
min(price) over(),
max(price) over()
from bookings;
select booking_id, name, host_name, neighbourhood, price,
round(price - avg(price) over()) as diff_from_avg_price,
round(price - max(price) over()) as diff_from_max_price
from bookings;
select max(price) from bookings;
select booking_id, name, host_name, neighbourhood_group, price, round(avg(price) over()) as avg_price,
round(price / avg(price) over() * 100) || '%' as percent_avg_price,
round(ceil(price) / max(price) over() * 100) || '%' as percent_max_price
from bookings;
-- partition by
select booking_id, name, host_name, neighbourhood_group, round(avg(price) over()) as avg_price,
round(avg(price) over(partition by neighbourhood_group)) as avg_price_of_ngh_group
from bookings;
select * from bookings
select booking_id, name, host_name, neighbourhood_group, round(avg(price) over()) as avg_price,
round(avg(price) over(partition by neighbourhood_group)) as avg_price_of_ngh_group,
round(avg(price) over(partition by neighbourhood_group, neighbourhood)) as avg_price_of_ngh
from bookings;
select booking_id, name, neighbourhood_group, neighbourhood, price, round(avg(price) over()) as avg_price,
price - round(avg(price) over(partition by neighbourhood_group)) as delta_price_ngh_group,
price - round(avg(price) over(partition by neighbourhood_group, neighbourhood)) as delta_price_of_ngh
from bookings;
--row_number
select booking_id, name, neighbourhood_group, neighbourhood, price,
row_number() over(order by price desc) as overall_price_rank
from bookings
select booking_id, name, neighbourhood_group, neighbourhood, price,
row_number() over(order by price desc) as overall_price_rank,
row_number() over(partition by neighbourhood_group order by price desc) as ngh_group_price_rank
from bookings
select booking_id, name, neighbourhood_group, neighbourhood, price,
row_number() over(order by price desc),
row_number() over(partition by neighbourhood_group order by price desc) as ngh_group_price_rank,
row_number() over(partition by neighbourhood_group, neighbourhood order by price desc) as ngh_price_rank
from bookings
--top 3
with cte as (
select booking_id, name, neighbourhood_group, neighbourhood, price,
row_number() over(partition by neighbourhood_group, neighbourhood order by price desc) as ngh_price_rank
from bookings
)
select * from cte where ngh_price_rank <= 3
select booking_id, name, neighbourhood_group, neighbourhood, price,
row_number() over(partition by neighbourhood_group, neighbourhood order by price desc) as ngh_price_rank,
case
when row_number() over(partition by neighbourhood_group, neighbourhood order by price desc) <=3 then 'yes'
else 'no'
end as is_top_3
from bookings
-- rank
select booking_id, name, neighbourhood_group, price,
row_number() over(order by price desc) as overall_price_rank,
rank() over(order by price desc) as overall_price_rank_with_rank
from bookings
-- dense rank
select booking_id, name, neighbourhood_group, price,
row_number() over(order by price desc) as overall_price_rank,
rank() over(order by price desc) as overall_price_rank_with_rank,
dense_rank() over(order by price desc) as overall_price_with_dense_rank
from bookings
select distinct(neighbourhood_group), count(1)
from bookings
group by neighbourhood_group
select neighbourhood_group, max(price)
from bookings
group by neighbourhood_group
with cte as(
select booking_id, name, neighbourhood_group, price,
dense_rank() over(partition by neighbourhood_group order by price desc) as price_rank
from bookings
)
select * from cte
where price_rank <=3
-- lag and lead
select * from bookings
select booking_id, name, neighbourhood_group, price,
lag(price) over(partition by neighbourhood_group order by price desc) as lagging_price,
lead(price) over(partition by neighbourhood_group order by price desc) as leading_price
from bookings
select booking_id, name, neighbourhood_group, price,
lag(last_review,3) over(partition by neighbourhood_group order by price desc) as previous_last_review_date_record,
last_review,
lead(last_review,3) over(partition by neighbourhood_group order by price desc) as next_last_review_date_record
from bookings
with cte as (
select booking_id, name, neighbourhood_group,
rank() over(partition by neighbourhood_group order by price desc) as ngh_group_price_rank,
lag(price) over(partition by neighbourhood_group order by price desc) as lagging_prev_price,
price,
lead(price) over(partition by neighbourhood_group order by price desc) as leading_next_price
from bookings
)
select * from cte
where ngh_group_price_rank <=5
-- rolling sum
select booking_id, name, neighbourhood_group, price,
sum(price) over(order by last_review range between unbounded preceding and current row) as rolling_sum
from bookings
-- rolling sum over 7 days
select booking_id, name, neighbourhood_group, price, last_review,
sum(price) over(order by last_review range between interval '7 days' preceding and current row) as rolling_sum_7days
from bookings
-- rolling sum over 7 rows
select booking_id, name, neighbourhood_group, price, last_review,
sum(price) over(order by price desc rows between 7 preceding and current row) as rolling_sum_7rows
from bookings
-- moving average 10 rows
select booking_id, name, neighbourhood_group, price, last_review,
round(avg(price) over(order by price desc rows between current row and 9 following)) as moving_avg_10rows
from bookings