-
Notifications
You must be signed in to change notification settings - Fork 106
/
92571f94989a_add_trending_views.py
289 lines (270 loc) · 10.5 KB
/
92571f94989a_add_trending_views.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
"""add trending views
Revision ID: 92571f94989a
Revises: 65ec7a3171c7
Create Date: 2021-09-22 16:27:23.281441
"""
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision = "92571f94989a"
down_revision = "65ec7a3171c7"
branch_labels = None
depends_on = None
def upgrade():
connection = op.get_bind()
connection.execute(
"""
DROP MATERIALIZED VIEW IF EXISTS aggregate_interval_plays;
CREATE MATERIALIZED VIEW aggregate_interval_plays as
SELECT
tracks.track_id as track_id,
tracks.genre as genre,
tracks.created_at as created_at,
COALESCE (week_listen_counts.count, 0) as week_listen_counts,
COALESCE (month_listen_counts.count, 0) as month_listen_counts,
COALESCE (year_listen_counts.count, 0) as year_listen_counts
FROM
tracks
LEFT OUTER JOIN (
SELECT
plays.play_item_id as play_item_id,
count(plays.id) as count
FROM
plays
WHERE
plays.created_at > (now() - interval '1 week')
GROUP BY plays.play_item_id
) as week_listen_counts ON week_listen_counts.play_item_id = tracks.track_id
LEFT OUTER JOIN (
SELECT
plays.play_item_id as play_item_id,
count(plays.id) as count
FROM
plays
WHERE
plays.created_at > (now() - interval '1 month')
GROUP BY plays.play_item_id
) as month_listen_counts ON month_listen_counts.play_item_id = tracks.track_id
LEFT OUTER JOIN (
SELECT
plays.play_item_id as play_item_id,
count(plays.id) as count
FROM
plays
WHERE
plays.created_at > (now() - interval '1 year')
GROUP BY plays.play_item_id
) as year_listen_counts ON year_listen_counts.play_item_id = tracks.track_id
WHERE
tracks.is_current is True AND
tracks.is_delete is False AND
tracks.is_unlisted is False AND
tracks.stem_of is Null;
CREATE INDEX interval_play_track_id_idx ON aggregate_interval_plays (track_id);
CREATE INDEX interval_play_week_count_idx ON aggregate_interval_plays (week_listen_counts);
CREATE INDEX interval_play_month_count_idx ON aggregate_interval_plays (month_listen_counts);
CREATE INDEX interval_play_year_count_idx ON aggregate_interval_plays (year_listen_counts);
---
DROP MATERIALIZED VIEW IF EXISTS trending_params;
CREATE MATERIALIZED VIEW trending_params as
SELECT
t.track_id as track_id,
t.genre as genre,
t.owner_id as owner_id,
ap.play_count as play_count,
au.follower_count as owner_follower_count,
COALESCE (aggregate_track.repost_count, 0) as repost_count,
COALESCE (aggregate_track.save_count, 0) as save_count,
COALESCE (repost_week.repost_count, 0) as repost_week_count,
COALESCE (repost_month.repost_count, 0) as repost_month_count,
COALESCE (repost_year.repost_count, 0) as repost_year_count,
COALESCE (save_week.repost_count, 0) as save_week_count,
COALESCE (save_month.repost_count, 0) as save_month_count,
COALESCE (save_year.repost_count, 0) as save_year_count,
COALESCE (karma.karma, 0) as karma
FROM
tracks t
-- join on subquery for aggregate play count
LEFT OUTER JOIN (
SELECT
ap.count as play_count,
ap.play_item_id as play_item_id
FROM
aggregate_plays ap
) as ap ON ap.play_item_id = t.track_id
-- join on subquery for aggregate user
LEFT OUTER JOIN (
SELECT
au.user_id as user_id,
au.follower_count as follower_count
FROM
aggregate_user au
) as au ON au.user_id = t.owner_id
-- join on subquery for aggregate track
LEFT OUTER JOIN (
SELECT
aggregate_track.track_id as track_id,
aggregate_track.repost_count as repost_count,
aggregate_track.save_count as save_count
FROM
aggregate_track
) as aggregate_track ON aggregate_track.track_id = t.track_id
-- -- join on subquery for reposts by year
LEFT OUTER JOIN (
SELECT
r.repost_item_id as track_id,
count(r.repost_item_id) as repost_count
FROM
reposts r
WHERE
r.is_current is True AND
r.repost_type = 'track' AND
r.is_delete is False AND
r.created_at > (now() - interval '1 year')
GROUP BY r.repost_item_id
) repost_year ON repost_year.track_id = t.track_id
-- -- join on subquery for reposts by month
LEFT OUTER JOIN (
SELECT
r.repost_item_id as track_id,
count(r.repost_item_id) as repost_count
FROM
reposts r
WHERE
r.is_current is True AND
r.repost_type = 'track' AND
r.is_delete is False AND
r.created_at > (now() - interval '1 month')
GROUP BY r.repost_item_id
) repost_month ON repost_month.track_id = t.track_id
-- -- join on subquery for reposts by week
LEFT OUTER JOIN (
SELECT
r.repost_item_id as track_id,
count(r.repost_item_id) as repost_count
FROM
reposts r
WHERE
r.is_current is True AND
r.repost_type = 'track' AND
r.is_delete is False AND
r.created_at > (now() - interval '1 week')
GROUP BY r.repost_item_id
) repost_week ON repost_week.track_id = t.track_id
-- -- join on subquery for saves by year
LEFT OUTER JOIN (
SELECT
r.save_item_id as track_id,
count(r.save_item_id) as repost_count
FROM
saves r
WHERE
r.is_current is True AND
r.save_type = 'track' AND
r.is_delete is False AND
r.created_at > (now() - interval '1 year')
GROUP BY r.save_item_id
) save_year ON save_year.track_id = t.track_id
-- -- join on subquery for saves by month
LEFT OUTER JOIN (
SELECT
r.save_item_id as track_id,
count(r.save_item_id) as repost_count
FROM
saves r
WHERE
r.is_current is True AND
r.save_type = 'track' AND
r.is_delete is False AND
r.created_at > (now() - interval '1 month')
GROUP BY r.save_item_id
) save_month ON save_month.track_id = t.track_id
-- -- join on subquery for saves by week
LEFT OUTER JOIN (
SELECT
r.save_item_id as track_id,
count(r.save_item_id) as repost_count
FROM
saves r
WHERE
r.is_current is True AND
r.save_type = 'track' AND
r.is_delete is False AND
r.created_at > (now() - interval '1 week')
GROUP BY r.save_item_id
) save_week ON save_week.track_id = t.track_id
LEFT OUTER JOIN (
SELECT
save_and_reposts.item_id as track_id,
sum(au.follower_count) as karma
FROM
(
select
r_and_s.user_id,
r_and_s.item_id
from
(select
user_id,
repost_item_id as item_id
from
reposts
where
is_delete is false AND
is_current is true AND
repost_type = 'track'
union all
select
user_id,
save_item_id as item_id
from
saves
where
is_delete is false AND
is_current is true AND
save_type = 'track'
) r_and_s
join
users
on r_and_s.user_id = users.user_id
where
users.cover_photo is not null AND
users.profile_picture is not null AND
users.bio is not null
) save_and_reposts
JOIN
aggregate_user au
ON
save_and_reposts.user_id = au.user_id
GROUP BY save_and_reposts.item_id
) karma ON karma.track_id = t.track_id
WHERE
t.is_current is True AND
t.is_delete is False AND
t.is_unlisted is False AND
t.stem_of is Null;
CREATE INDEX trending_params_track_id_idx ON trending_params (track_id);
"""
)
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"track_trending_scores",
sa.Column("track_id", sa.Integer(), nullable=False, index=True),
sa.Column("type", sa.String(), index=True),
sa.Column("genre", sa.String(), index=True),
sa.Column("version", sa.String(), nullable=False),
sa.Column("time_range", sa.String(), nullable=True),
sa.Column("score", sa.Float(), nullable=False, index=True),
sa.Column("created_at", sa.DateTime(), nullable=False, default=sa.func.now()),
sa.PrimaryKeyConstraint("track_id", "type", "version", "time_range"),
)
def downgrade():
connection = op.get_bind()
connection.execute(
"""
DROP INDEX IF EXISTS interval_play_track_id_idx;
DROP MATERIALIZED VIEW aggregate_interval_plays;
DROP INDEX IF EXISTS trending_params_track_id_idx;
DROP MATERIALIZED VIEW trending_params;
"""
)
op.drop_table("track_trending_scores")