Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions definitions/output/f1/pages_latest.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
publish('pages_latest', {
type: 'table',
schema: 'f1',
description: 'The latest date from the crawl.pages table',
bigquery: {
partitionBy: 'date',
clusterBy: ['client', 'is_root_page', 'rank', 'page']
},
tags: ['crawl_complete']
}).preOps(ctx => `
SET @@RESERVATION='projects/httparchive/locations/US/reservations/enterprise';
`).query(ctx => `
SELECT
date,
client,
page,
is_root_page,
root_page,
rank,
wptid,
TO_JSON_STRING(payload) AS payload,
TO_JSON_STRING(summary) AS summary,
STRUCT<
a11y STRING,
cms STRING,
cookies STRING,
css_variables STRING,
ecommerce STRING,
element_count STRING,
javascript STRING,
markup STRING,
media STRING,
origin_trials STRING,
performance STRING,
privacy STRING,
responsive_images STRING,
robots_txt STRING,
security STRING,
structured_data STRING,
third_parties STRING,
well_known STRING,
wpt_bodies STRING,
other STRING
> (
TO_JSON_STRING(custom_metrics.a11y),
TO_JSON_STRING(custom_metrics.cms),
TO_JSON_STRING(custom_metrics.cookies),
TO_JSON_STRING(custom_metrics.css_variables),
TO_JSON_STRING(custom_metrics.ecommerce),
TO_JSON_STRING(custom_metrics.element_count),
TO_JSON_STRING(custom_metrics.javascript),
TO_JSON_STRING(custom_metrics.markup),
TO_JSON_STRING(custom_metrics.media),
TO_JSON_STRING(custom_metrics.origin_trials),
TO_JSON_STRING(custom_metrics.performance),
TO_JSON_STRING(custom_metrics.privacy),
TO_JSON_STRING(custom_metrics.responsive_images),
TO_JSON_STRING(custom_metrics.robots_txt),
TO_JSON_STRING(custom_metrics.security),
TO_JSON_STRING(custom_metrics.structured_data),
TO_JSON_STRING(custom_metrics.third_parties),
TO_JSON_STRING(custom_metrics.well_known),
TO_JSON_STRING(custom_metrics.wpt_bodies),
TO_JSON_STRING(custom_metrics.other)
) AS custom_metrics,
TO_JSON_STRING(lighthouse) AS lighthouse,
features,
technologies,
TO_JSON_STRING(metadata) AS metadata
FROM ${ctx.ref('crawl', 'pages')}
WHERE
date = '${constants.currentMonth}'
`)
32 changes: 32 additions & 0 deletions definitions/output/f1/requests_latest.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
publish('requests_latest', {
type: 'table',
schema: 'f1',
description: 'The latest date from the crawl.requests table',
bigquery: {
partitionBy: 'date',
clusterBy: ['client', 'is_root_page', 'rank', 'type']
},
tags: ['crawl_complete']
}).preOps(ctx => `
SET @@RESERVATION='projects/httparchive/locations/US/reservations/enterprise';
`).query(ctx => `
SELECT
date,
client,
page,
is_root_page,
root_page,
rank,
url,
is_main_document,
type,
index,
TO_JSON_STRING(payload) AS payload,
TO_JSON_STRING(summary) AS summary,
request_headers,
response_headers,
response_body
FROM ${ctx.ref('crawl', 'requests')}
WHERE
date = '${constants.currentMonth}'
`)
7 changes: 4 additions & 3 deletions definitions/output/sample_data/pages_10k.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ publish('pages_10k', {
schema: 'sample_data',
bigquery: {
partitionBy: 'date',
clusterBy: ['client', 'is_root_page', 'rank']
clusterBy: ['client', 'is_root_page', 'rank', 'page']
},
tags: ['crawl_complete']
}).query(ctx => `
SELECT *
FROM ${ctx.ref('crawl', 'pages')}
WHERE date = '${constants.currentMonth}' AND
rank <= 10000
WHERE
date = '${constants.currentMonth}' AND
rank <= 10000
`)
7 changes: 4 additions & 3 deletions definitions/output/sample_data/requests_10k.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ publish('requests_10k', {
schema: 'sample_data',
bigquery: {
partitionBy: 'date',
clusterBy: ['client', 'is_root_page', 'is_main_document', 'type']
clusterBy: ['client', 'is_root_page', 'rank', 'type']
},
tags: ['crawl_complete']
}).query(ctx => `
SELECT *
FROM ${ctx.ref('crawl', 'requests')}
WHERE date = '${constants.currentMonth}' AND
rank <= 10000
WHERE
date = '${constants.currentMonth}' AND
rank <= 10000
`)