Skip to content

Commit c0abb4e

Browse files
author
Jose Alberto Calvo Vargas
committed
Add use case
1 parent e85e44d commit c0abb4e

16 files changed

+543
-0
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
TOKEN "tracker" APPEND
2+
3+
DESCRIPTION >
4+
Analytics events landing data source
5+
6+
SCHEMA >
7+
`timestamp` DateTime `json:$.timestamp` DEFAULT now(),
8+
`session_id` String `json:$.session_id` DEFAULT '',
9+
`action` LowCardinality(String) `json:$.action` DEFAULT 'None',
10+
`version` LowCardinality(String) `json:$.version` DEFAULT '1.0',
11+
`payload` String `json:$.payload` DEFAULT '{}'
12+
13+
ENGINE "MergeTree"
14+
ENGINE_PARTITION_KEY "toYYYYMM(timestamp)"
15+
ENGINE_SORTING_KEY "timestamp"
16+
ENGINE_TTL "timestamp + toIntervalDay(60)"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
SCHEMA >
3+
`date` Date,
4+
`device` String,
5+
`browser` String,
6+
`location` String,
7+
`pathname` String,
8+
`visits` AggregateFunction(uniq, String),
9+
`hits` AggregateFunction(count)
10+
11+
ENGINE "AggregatingMergeTree"
12+
ENGINE_PARTITION_KEY "toYYYYMM(date)"
13+
ENGINE_SORTING_KEY "date, device, browser, location, pathname"
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
2+
SCHEMA >
3+
`date` Date,
4+
`session_id` String,
5+
`device` SimpleAggregateFunction(any, String),
6+
`browser` SimpleAggregateFunction(any, String),
7+
`location` SimpleAggregateFunction(any, String),
8+
`first_hit` SimpleAggregateFunction(min, DateTime),
9+
`latest_hit` SimpleAggregateFunction(max, DateTime),
10+
`hits` AggregateFunction(count)
11+
12+
ENGINE "AggregatingMergeTree"
13+
ENGINE_PARTITION_KEY "toYYYYMM(date)"
14+
ENGINE_SORTING_KEY "date, session_id"
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
SCHEMA >
3+
`date` Date,
4+
`device` String,
5+
`browser` String,
6+
`location` String,
7+
`referrer` String,
8+
`visits` AggregateFunction(uniq, String),
9+
`hits` AggregateFunction(count)
10+
11+
ENGINE "AggregatingMergeTree"
12+
ENGINE_PARTITION_KEY "toYYYYMM(date)"
13+
ENGINE_SORTING_KEY "date, device, browser, location, referrer"
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
DESCRIPTION >
2+
Parsed `page_hit` events, implementing `browser` and `device` detection logic.
3+
4+
5+
TOKEN "dashboard" READ
6+
7+
NODE parsed_hits
8+
DESCRIPTION >
9+
Parse raw page_hit events
10+
11+
SQL >
12+
13+
SELECT
14+
timestamp,
15+
action,
16+
version,
17+
coalesce(session_id, '0') as session_id,
18+
JSONExtractString(payload, 'locale') as locale,
19+
JSONExtractString(payload, 'location') as location,
20+
JSONExtractString(payload, 'referrer') as referrer,
21+
JSONExtractString(payload, 'pathname') as pathname,
22+
JSONExtractString(payload, 'href') as href,
23+
lower(JSONExtractString(payload, 'user-agent')) as user_agent
24+
FROM analytics_events
25+
where action = 'page_hit'
26+
27+
28+
29+
NODE endpoint
30+
SQL >
31+
32+
SELECT
33+
timestamp,
34+
action,
35+
version,
36+
session_id,
37+
location,
38+
referrer,
39+
pathname,
40+
href,
41+
case
42+
when match(user_agent, 'wget|ahrefsbot|curl|urllib|bitdiscovery|\+https://|googlebot')
43+
then 'bot'
44+
when match(user_agent, 'android')
45+
then 'mobile-android'
46+
when match(user_agent, 'ipad|iphone|ipod')
47+
then 'mobile-ios'
48+
else 'desktop'
49+
END as device,
50+
case
51+
when match(user_agent, 'firefox')
52+
then 'firefox'
53+
when match(user_agent, 'chrome|crios')
54+
then 'chrome'
55+
when match(user_agent, 'opera')
56+
then 'opera'
57+
when match(user_agent, 'msie|trident')
58+
then 'ie'
59+
when match(user_agent, 'iphone|ipad|safari')
60+
then 'safari'
61+
else 'Unknown'
62+
END as browser
63+
FROM parsed_hits
64+
65+
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
NODE analytics_pages_1
2+
DESCRIPTION >
3+
Aggregate by pathname and calculate session and hits
4+
5+
SQL >
6+
7+
SELECT
8+
toDate(timestamp) AS date,
9+
device,
10+
browser,
11+
location,
12+
pathname,
13+
uniqState(session_id) AS visits,
14+
countState() AS hits
15+
FROM analytics_hits
16+
GROUP BY date, device, browser, location, pathname
17+
18+
TYPE materialized
19+
DATASOURCE analytics_pages_mv
20+
21+
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
NODE analytics_sessions_1
2+
DESCRIPTION >
3+
Aggregate by session_id and calculate session metrics
4+
5+
SQL >
6+
7+
SELECT
8+
toDate(timestamp) AS date,
9+
session_id,
10+
anySimpleState(device) AS device,
11+
anySimpleState(browser) AS browser,
12+
anySimpleState(location) AS location,
13+
minSimpleState(timestamp) AS first_hit,
14+
maxSimpleState(timestamp) AS latest_hit,
15+
countState() AS hits
16+
FROM analytics_hits
17+
GROUP BY date, session_id
18+
19+
TYPE materialized
20+
DATASOURCE analytics_sessions_mv
21+
22+
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
NODE analytics_sources_1
2+
DESCRIPTION >
3+
Aggregate by referral and calculate session and hits
4+
5+
SQL >
6+
7+
WITH (SELECT domainWithoutWWW(href) FROM analytics_hits LIMIT 1) AS currenct_domain
8+
SELECT
9+
toDate(timestamp) AS date,
10+
device,
11+
browser,
12+
location,
13+
referrer,
14+
uniqState(session_id) AS visits,
15+
countState() AS hits
16+
FROM analytics_hits
17+
WHERE domainWithoutWWW(referrer) != currenct_domain
18+
GROUP BY date, device, browser, location, referrer
19+
20+
TYPE materialized
21+
DATASOURCE analytics_sources_mv
22+
23+
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
DESCRIPTION >
2+
Summary with general KPIs per date, including visits, page views, bounce rate and average session duration.
3+
Accepts `date_from` and `date_to` date filter, all historical data if not passed.
4+
Daily granularity, except when filtering one single day (hourly)
5+
6+
7+
TOKEN "dashboard" READ
8+
9+
NODE timeseries
10+
DESCRIPTION >
11+
Generate a timeseries for the specified time range, so we call fill empty data points.
12+
Filters "future" data points.
13+
14+
SQL >
15+
16+
%
17+
{% set _single_day = defined(date_from) and day_diff(date_from, date_to) == 0 %}
18+
with
19+
{% if defined(date_from) %}
20+
toStartOfDay(
21+
toDate(
22+
{{
23+
Date(
24+
date_from,
25+
description="Starting day for filtering a date range",
26+
required=False,
27+
)
28+
}}
29+
)
30+
) as start,
31+
{% else %} toStartOfDay(timestampAdd(today(), interval -7 day)) as start,
32+
{% end %}
33+
{% if defined(date_to) %}
34+
toStartOfDay(
35+
toDate(
36+
{{
37+
Date(
38+
date_to,
39+
description="Finishing day for filtering a date range",
40+
required=False,
41+
)
42+
}}
43+
)
44+
) as end
45+
{% else %} toStartOfDay(today()) as end
46+
{% end %}
47+
{% if _single_day %}
48+
select
49+
arrayJoin(
50+
arrayMap(
51+
x -> toDateTime(x),
52+
range(
53+
toUInt32(toDateTime(start)), toUInt32(timestampAdd(end, interval 1 day)), 3600
54+
)
55+
)
56+
) as date
57+
{% else %}
58+
select
59+
arrayJoin(
60+
arrayMap(
61+
x -> toDate(x),
62+
range(toUInt32(start), toUInt32(timestampAdd(end, interval 1 day)), 24 * 3600)
63+
)
64+
) as date
65+
{% end %}
66+
where date <= now()
67+
68+
69+
70+
NODE hits
71+
DESCRIPTION >
72+
Group by sessions and calculate metrics at that level
73+
74+
SQL >
75+
76+
%
77+
{% if defined(date_from) and day_diff(date_from, date_to) == 0 %}
78+
select
79+
toStartOfHour(timestamp) as date,
80+
session_id,
81+
uniq(session_id) as visits,
82+
count() as pageviews,
83+
case when min(timestamp) = max(timestamp) then 1 else 0 end as is_bounce,
84+
max(timestamp) as latest_hit_aux,
85+
min(timestamp) as first_hit_aux
86+
from analytics_hits
87+
where toDate(timestamp) = {{ Date(date_from) }}
88+
group by toStartOfHour(timestamp), session_id
89+
{% else %}
90+
select
91+
date,
92+
session_id,
93+
uniq(session_id) as visits,
94+
countMerge(hits) as pageviews,
95+
case when min(first_hit) = max(latest_hit) then 1 else 0 end as is_bounce,
96+
max(latest_hit) as latest_hit_aux,
97+
min(first_hit) as first_hit_aux
98+
from analytics_sessions_mv
99+
where
100+
{% if defined(date_from) %} date >= {{ Date(date_from) }}
101+
{% else %} date >= timestampAdd(today(), interval -7 day)
102+
{% end %}
103+
{% if defined(date_to) %} and date <= {{ Date(date_to) }}
104+
{% else %} and date <= today()
105+
{% end %}
106+
group by date, session_id
107+
{% end %}
108+
109+
110+
111+
NODE data
112+
DESCRIPTION >
113+
General KPIs per date, works for both summary metrics and trends charts.
114+
115+
SQL >
116+
117+
select
118+
date,
119+
uniq(session_id) as visits,
120+
sum(pageviews) as pageviews,
121+
sum(case when latest_hit_aux = first_hit_aux then 1 end) / visits as bounce_rate,
122+
avg(latest_hit_aux - first_hit_aux) as avg_session_sec
123+
from hits
124+
group by date
125+
126+
127+
128+
NODE endpoint
129+
DESCRIPTION >
130+
Join and generate timeseries with metrics
131+
132+
SQL >
133+
134+
select a.date, b.visits, b.pageviews, b.bounce_rate, b.avg_session_sec
135+
from timeseries a
136+
left join data b using date
137+
138+
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
DESCRIPTION >
2+
Top Browsers ordered by most visits.
3+
Accepts `date_from` and `date_to` date filter. Defaults to last 7 days.
4+
Also `skip` and `limit` parameters for pagination.
5+
6+
7+
TOKEN "dashboard" READ
8+
9+
NODE endpoint
10+
DESCRIPTION >
11+
Group by browser and calcualte hits and visits
12+
13+
SQL >
14+
15+
%
16+
select browser, uniqMerge(visits) as visits, countMerge(hits) as hits
17+
from analytics_sources_mv
18+
where
19+
{% if defined(date_from) %}
20+
date
21+
>=
22+
{{ Date(date_from, description="Starting day for filtering a date range", required=False) }}
23+
{% else %} date >= timestampAdd(today(), interval -7 day)
24+
{% end %}
25+
{% if defined(date_to) %}
26+
and date
27+
<=
28+
{{ Date(date_to, description="Finishing day for filtering a date range", required=False) }}
29+
{% else %} and date <= today()
30+
{% end %}
31+
group by browser
32+
order by visits desc
33+
limit {{ Int32(skip, 0) }},{{ Int32(limit, 50) }}
34+
35+

0 commit comments

Comments
 (0)