Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Markup chapter queries #115

Merged
merged 5 commits into from Aug 14, 2019
Merged
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.

Always

Just for now

No changes.
@@ -0,0 +1,24 @@
#standardSQL
# 03_01a: % of pages with deprecated elements
CREATE TEMPORARY FUNCTION containsDeprecatedElement(payload STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var elements = JSON.parse($._element_count)
var deprecatedElements = new Set(["applet","acronym","bgsound","dir","frame","frameset","noframes","isindex","keygen","listing","menuitem","nextid","noembed","plaintext","rb","rtc","strike","xmp","basefont","big","blink","center","font","marquee","multicol","nobr","spacer","tt"]);
return !!Object.keys(elements).find(e => {
return deprecatedElements.has(e);
});
} catch (e) {
return false;
}
''';

SELECT
_TABLE_SUFFIX AS client,
COUNTIF(containsDeprecatedElement(payload)) AS pages,
ROUND(COUNTIF(containsDeprecatedElement(payload)) * 100 / COUNT(0), 2) AS pct_pages
FROM
`httparchive.pages.2019_07_01_*`
GROUP BY
client
@@ -0,0 +1,34 @@
#standardSQL
# 03_01b: Top deprecated elements
CREATE TEMPORARY FUNCTION getElements(payload STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var elements = JSON.parse($._element_count)
return Object.keys(elements);
} catch (e) {
return [];
}
''';

CREATE TEMPORARY FUNCTION isDeprecated(element STRING) AS (
element IN ("applet","acronym","bgsound","dir","frame","frameset","noframes","isindex","keygen","listing","menuitem","nextid","noembed","plaintext","rb","rtc","strike","xmp","basefont","big","blink","center","font","marquee","multicol","nobr","spacer","tt")
);

SELECT
_TABLE_SUFFIX AS client,
element AS deprecated,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX), 2) AS pct
FROM
`httparchive.pages.2019_07_01_*`,
UNNEST(getElements(payload)) AS element
WHERE
isDeprecated(element)
GROUP BY
client,
deprecated
ORDER BY
freq / total DESC,
client
@@ -0,0 +1,32 @@
#standardSQL
# 03_02a: % of pages having elements
CREATE TEMPORARY FUNCTION getElements(payload STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var elements = JSON.parse($._element_count)
return Object.keys(elements);
} catch (e) {
return [];
}
''';

SELECT
_TABLE_SUFFIX AS client,
element,
COUNT(DISTINCT url) AS pages,
total,
ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct
FROM
`httparchive.pages.2019_07_01_*`
JOIN
(SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX)
USING (_TABLE_SUFFIX),
UNNEST(getElements(payload)) AS element
GROUP BY
client,
total,
element
ORDER BY
pct DESC,
client
@@ -0,0 +1,28 @@
#standardSQL
# 03_02b: Top elements
CREATE TEMPORARY FUNCTION getElements(payload STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var elements = JSON.parse($._element_count)
return Object.keys(elements);
} catch (e) {
return [];
}
''';

SELECT
_TABLE_SUFFIX AS client,
element,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX), 2) AS pct
FROM
`httparchive.pages.2019_07_01_*`,
UNNEST(getElements(payload)) AS element
GROUP BY
client,
element
ORDER BY
freq / total DESC,
client
@@ -0,0 +1,21 @@
#standardSQL
# 03_03a: % of pages with custom elements ("slang")
CREATE TEMPORARY FUNCTION containsCustomElement(payload STRING)
RETURNS BOOLEAN LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var elements = JSON.parse($._element_count)
return Object.keys(elements).filter(e => e.includes('-')).length > 0;
} catch (e) {
return false;
}
''';

SELECT
_TABLE_SUFFIX AS client,
COUNTIF(containsCustomElement(payload)) AS pages,
ROUND(COUNTIF(containsCustomElement(payload)) * 100 / COUNT(0), 2) AS pct_pages
FROM
`httparchive.pages.2019_07_01_*`
GROUP BY
client
@@ -0,0 +1,28 @@
#standardSQL
# 03_03b: Top custom elements ("slang")
CREATE TEMPORARY FUNCTION getCustomElements(payload STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var elements = JSON.parse($._element_count)
return Object.keys(elements).filter(e => e.includes('-'));
} catch (e) {
return [];
}
''';

SELECT
_TABLE_SUFFIX AS client,
custom_element,
COUNT(0) AS freq,
SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX) AS total,
ROUND(COUNT(0) * 100 / SUM(COUNT(0)) OVER (PARTITION BY _TABLE_SUFFIX), 2) AS pct
FROM
`httparchive.pages.2019_07_01_*`,
UNNEST(getCustomElements(payload)) AS custom_element
GROUP BY
client,
custom_element
ORDER BY
freq / total DESC,
client
@@ -0,0 +1,33 @@
#standardSQL
# 03_03c: % of pages having custom elements
CREATE TEMPORARY FUNCTION getCustomElements(payload STRING)
RETURNS ARRAY<STRING> LANGUAGE js AS '''
try {
var $ = JSON.parse(payload);
var elements = JSON.parse($._element_count)
return Object.keys(elements).filter(e => e.includes('-'));
} catch (e) {
return [];
}
''';

SELECT
_TABLE_SUFFIX AS client,
element,
COUNT(DISTINCT url) AS pages,
total,
ROUND(COUNT(DISTINCT url) * 100 / total, 2) AS pct
FROM
`httparchive.pages.2019_07_01_*`
JOIN
(SELECT _TABLE_SUFFIX, COUNT(0) AS total FROM `httparchive.pages.2019_07_01_*` GROUP BY _TABLE_SUFFIX)
USING (_TABLE_SUFFIX),
UNNEST(getCustomElements(payload)) AS element
GROUP BY
client,
total,
element
ORDER BY
pct DESC,
client
LIMIT 10000
@@ -0,0 +1,14 @@
#standardSQL
# 03_05: % of pages with shadow roots
CREATE TEMPORARY FUNCTION hasShadowRoot(payload STRING) AS (
JSON_EXTRACT_SCALAR(payload, '$._has_shadow_root') = 'true'
);

SELECT
_TABLE_SUFFIX AS client,
COUNTIF(hasShadowRoot(payload)) AS pages,
ROUND(COUNTIF(hasShadowRoot(payload)) * 100 / COUNT(0), 2) AS pct_pages
FROM
`httparchive.pages.2019_07_01_*`
GROUP BY
client
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.