Skip to content

Commit

Permalink
fix 2022/privacy
Browse files Browse the repository at this point in the history
  • Loading branch information
max-ostapenko committed May 19, 2024
1 parent 85c91d6 commit ce5dea3
Show file tree
Hide file tree
Showing 18 changed files with 71 additions and 53 deletions.
2 changes: 1 addition & 1 deletion sql/2022/privacy/most_common_client_hints.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ meta_tags AS (
JSON_VALUE(payload, '$._almanac') AS metrics
FROM
`httparchive.pages.2022_06_01_*`
),
),
UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
WHERE
JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ meta_tags AS (
JSON_VALUE(payload, '$._almanac') AS metrics
FROM
`httparchive.pages.2022_06_01_*`
),
),
UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
WHERE
JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
Expand Down Expand Up @@ -92,8 +92,10 @@ merged_policy AS (
meta_tags
USING (client, page)
WHERE
(header_name IN ('feature-policy', 'permissions-policy') OR
tag_name IN ('feature-policy', 'permissions-policy')) AND
(
header_name IN ('feature-policy', 'permissions-policy') OR
tag_name IN ('feature-policy', 'permissions-policy')
) AND
header_value IS NOT NULL AND
tag_value IS NOT NULL
)
Expand Down
10 changes: 6 additions & 4 deletions sql/2022/privacy/most_common_purposes_for_iab_tcf_v2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
# https://stackoverflow.com/a/65054751/7391782
# Warning: fails if there are colons in the keys/values, but these are not expected

CREATE TEMPORARY FUNCTION ExtractKeyValuePairs(input STRING) RETURNS ARRAY < STRUCT < key STRING,
value STRING > > AS (
CREATE TEMPORARY FUNCTION ExtractKeyValuePairs(input STRING) RETURNS ARRAY < STRUCT <
key STRING,
value STRING
> > AS (
(
SELECT
ARRAY(
Expand Down Expand Up @@ -41,8 +43,8 @@ WITH pages_iab_tcf_v2 AS (
SELECT
client,
field,
result.key AS key,
result.value AS value,
result.key,
result.value,
COUNT(0) AS number_of_websites_with_purpose
FROM
(
Expand Down
2 changes: 1 addition & 1 deletion sql/2022/privacy/nb_sites_with_cname_tracking.sql
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ WITH websites_using_cname_tracking AS (

totals AS (
SELECT
_TABLE_SUFFIX AS _TABLE_SUFFIX,
_TABLE_SUFFIX,
count(0) AS total_pages
FROM
`httparchive.summary_pages.2022_06_01_*`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ WITH websites_using_cname_tracking AS (

totals AS (
SELECT
_TABLE_SUFFIX AS _TABLE_SUFFIX,
_TABLE_SUFFIX,
count(0) AS total_pages
FROM
`httparchive.summary_pages.2022_06_01_*`
Expand Down
2 changes: 1 addition & 1 deletion sql/2022/privacy/nb_sites_with_cname_tracking_per_rank.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ WITH websites_using_cname_tracking AS (

totals AS (
SELECT
_TABLE_SUFFIX AS _TABLE_SUFFIX,
_TABLE_SUFFIX,
rank_grouping,
count(0) AS total_pages
FROM
Expand Down
2 changes: 1 addition & 1 deletion sql/2022/privacy/number_of_websites_using_each_cmp.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
app,
total_websites AS total_websites,
total_websites,
COUNT(DISTINCT url) AS number_of_websites,
COUNT(DISTINCT url) / total_websites AS percent_of_websites
FROM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
app,
total_websites AS total_websites,
total_websites,
COUNT(DISTINCT url) AS number_of_websites,
COUNT(DISTINCT url) / total_websites AS percent_of_websites
FROM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
app,
total_websites AS total_websites,
total_websites,
COUNT(DISTINCT url) AS number_of_websites,
COUNT(DISTINCT url) / total_websites AS percent_of_websites
FROM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ WITH totals AS (
SELECT
_TABLE_SUFFIX AS client,
app,
total_websites AS total_websites,
total_websites,
COUNT(DISTINCT url) AS number_of_websites,
COUNT(DISTINCT url) / total_websites AS percent_of_websites
FROM
Expand Down
2 changes: 1 addition & 1 deletion sql/2022/privacy/number_of_websites_with_client_hints.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ meta_tags AS (
JSON_VALUE(payload, '$._almanac') AS metrics
FROM
`httparchive.pages.2022_06_01_*`
),
),
UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
WHERE
JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
Expand Down
2 changes: 1 addition & 1 deletion sql/2022/privacy/number_of_websites_with_floc_opt_out.sql
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ meta_tags AS (
JSON_VALUE(payload, '$._almanac') AS metrics
FROM
`httparchive.pages.2022_06_01_*`
),
),
UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
WHERE
JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ SELECT DISTINCT
feature,
num_urls,
total_urls,
pct_urls AS pct_urls
pct_urls
FROM
`httparchive.blink_features.usage`
WHERE
Expand Down
4 changes: 2 additions & 2 deletions sql/2022/privacy/number_of_websites_with_nb_trackers.sql
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ FROM (
GROUP BY
client,
page
)
)
JOIN
totals
USING (client)
Expand Down Expand Up @@ -92,7 +92,7 @@ FROM (
GROUP BY
client,
page
)
)
JOIN
totals
USING (client)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ meta_tags AS (
JSON_VALUE(payload, '$._almanac') AS metrics
FROM
`httparchive.pages.2022_06_01_*`
),
),
UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
WHERE
JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
Expand Down
36 changes: 21 additions & 15 deletions sql/2022/privacy/number_of_websites_with_permissions_policy.sql
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ meta_tags AS (
JSON_VALUE(payload, '$._almanac') AS metrics
FROM
`httparchive.pages.2022_06_01_*`
),
),
UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node
WHERE
JSON_VALUE(meta_node, '$.http-equiv') IS NOT NULL
Expand All @@ -59,22 +59,28 @@ FROM (
SELECT
client,
rank_grouping,
COUNT(DISTINCT IF(
header_name = 'feature-policy' OR
tag_name = 'feature-policy',
page, NULL)
COUNT(
DISTINCT IF(
header_name = 'feature-policy' OR
tag_name = 'feature-policy',
page, NULL
)
) AS number_of_websites_with_feature_policy,
COUNT(DISTINCT IF(
header_name = 'permissions-policy' OR
tag_name = 'permissions-policy',
page, NULL)
COUNT(
DISTINCT IF(
header_name = 'permissions-policy' OR
tag_name = 'permissions-policy',
page, NULL
)
) AS number_of_websites_with_permissions_policy,
COUNT(DISTINCT IF(
header_name = 'feature-policy' OR
tag_name = 'feature-policy' OR
header_name = 'permissions-policy' OR
tag_name = 'permissions-policy',
page, NULL)
COUNT(
DISTINCT IF(
header_name = 'feature-policy' OR
tag_name = 'feature-policy' OR
header_name = 'permissions-policy' OR
tag_name = 'permissions-policy',
page, NULL
)
) AS number_of_websites_with_any_policy,
COUNT(DISTINCT page) AS number_of_websites
FROM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ SELECT DISTINCT
feature,
num_urls,
total_urls,
pct_urls AS pct_urls
pct_urls
FROM
`httparchive.blink_features.usage`
WHERE
Expand Down
40 changes: 24 additions & 16 deletions sql/2022/privacy/number_of_websites_with_referrerpolicy.sql
Original file line number Diff line number Diff line change
Expand Up @@ -49,28 +49,36 @@ FROM (
SELECT
client,
COUNT(DISTINCT IF(
entire_document_policy_meta IS NOT NULL,
page, NULL)) AS number_of_websites_with_entire_document_policy_meta,
entire_document_policy_meta IS NOT NULL,
page, NULL
)) AS number_of_websites_with_entire_document_policy_meta,
COUNT(DISTINCT IF(
entire_document_policy_header IS NOT NULL,
page, NULL)) AS number_of_websites_with_entire_document_policy_header,
COUNT(DISTINCT IF(
entire_document_policy_meta IS NOT NULL OR
entire_document_policy_header IS NOT NULL,
page, NULL)
page, NULL
)) AS number_of_websites_with_entire_document_policy_header,
COUNT(
DISTINCT IF(
entire_document_policy_meta IS NOT NULL OR
entire_document_policy_header IS NOT NULL,
page, NULL
)
) AS number_of_websites_with_entire_document_policy,
COUNT(DISTINCT IF(
ARRAY_LENGTH(individual_requests) > 0,
page, NULL)) AS number_of_websites_with_any_individual_requests,
ARRAY_LENGTH(individual_requests) > 0,
page, NULL
)) AS number_of_websites_with_any_individual_requests,
COUNT(DISTINCT IF(
ARRAY_LENGTH(link_relations) > 0,
page, NULL)) AS number_of_websites_with_any_link_relations,
COUNT(DISTINCT IF(
entire_document_policy_meta IS NOT NULL OR
entire_document_policy_header IS NOT NULL OR
ARRAY_LENGTH(individual_requests) > 0 OR
ARRAY_LENGTH(link_relations) > 0,
page, NULL)
page, NULL
)) AS number_of_websites_with_any_link_relations,
COUNT(
DISTINCT IF(
entire_document_policy_meta IS NOT NULL OR
entire_document_policy_header IS NOT NULL OR
ARRAY_LENGTH(individual_requests) > 0 OR
ARRAY_LENGTH(link_relations) > 0,
page, NULL
)
) AS number_of_websites_with_any_referrer_policy,
COUNT(DISTINCT page) AS number_of_websites
FROM
Expand Down

0 comments on commit ce5dea3

Please sign in to comment.