Skip to content
This repository has been archived by the owner on Mar 14, 2024. It is now read-only.

Commit

Permalink
implement "up-to-date" build design (#1545)
Browse files Browse the repository at this point in the history
* build and cache stuff

* actually use twitter fetcher

* maybe and tasks

* add build step

* limit build

* remove unneeded sample files

* better cpu and stuffs

* review feedback

* lint allow

* JSON load file

* remove high cpu, create 11tyignore early

* fix lint

* notes

* throw on uncaught rejection

* support fallback in CI build

* feedback

* opts
  • Loading branch information
samthor committed Oct 7, 2021
1 parent 7108e3e commit 6a5c07e
Show file tree
Hide file tree
Showing 17 changed files with 1,149 additions and 83 deletions.
2 changes: 2 additions & 0 deletions .cloudbuild/deploy.yaml
Expand Up @@ -28,3 +28,5 @@ substitutions:

options:
machineType: 'E2_HIGHCPU_8' # yolo
env:
- 'NODE_OPTIONS=--max_old_space_size=4096 --unhandled-rejections=strict'
46 changes: 46 additions & 0 deletions .cloudbuild/external.yaml
@@ -0,0 +1,46 @@
# This builds external data, and if the site passes a basic build, writes the data to
# Cloud Storage.

timeout: 900s # set build timeout to 15 mins

steps:
- name: node:14
id: 'Install dependencies'
entrypoint: npm
args: ['ci']

- name: node:14
id: 'Build external data'
entrypoint: npm
args: ['run', 'build-external']
env:
- 'PROJECT_ID=$PROJECT_ID'
- 'NODE_ENV=production'

- name: node:14
id: 'Create .eleventyignore file'
entrypoint: npm
args: ['run', 'ignore']
env:
# TODO: We should try to remove more stuff here, because it's supposed to
# be a fast build to sanity-check output.
# It needs to include extensions and API generation.
- 'ELEVENTY_IGNORE_NACL=true'

- name: node:14
id: 'Build eleventy in dev mode to confirm'
entrypoint: npm
args: ['run', 'eleventy']
# This does NOT set `NODE_ENV=production`, as we don't need the full build.

- name: 'gcr.io/cloud-builders/gcloud'
id: 'Synchronize content to external-dcc-data bucket'
entrypoint: bash
args:
- '-c'
- |
gsutil rsync external/data/ gs://external-dcc-data
options:
env:
- 'NODE_OPTIONS=--max_old_space_size=4096 --unhandled-rejections=strict'
3 changes: 3 additions & 0 deletions .eleventy.js
Expand Up @@ -61,6 +61,9 @@ module.exports = eleventyConfig => {
// to use it for its build.
eleventyConfig.setUseGitIgnore(false);

// Watch our external data in case it is synchronized or rebuilt.
eleventyConfig.addWatchTarget('./external/data/');

// Merge eleventy's data cascade. This means directory data files will
// cascade down to any child directories.
eleventyConfig.setDataDeepMerge(true);
Expand Down
13 changes: 11 additions & 2 deletions .github/workflows/check.yml
Expand Up @@ -7,6 +7,9 @@ jobs:
lint_and_test:
name: Lint, test and build
runs-on: ubuntu-latest
env:
# Increase RAM limit, and make uncaught exceptions crash (default in 16+).
NODE_OPTIONS: --max_old_space_size=4096 --unhandled-rejections=strict
steps:
- name: Checkout
uses: actions/checkout@v1.0.0
Expand Down Expand Up @@ -46,6 +49,8 @@ jobs:
- '**/*.scss'
njk:
- '**/*.njk'
_external:
- 'external/**/*'
# Use the filter to check if files with a specific file type were changed
# in the PR. If they were, run the relevant linters. Otherwise, skip.
Expand All @@ -66,9 +71,13 @@ jobs:
if: ${{ steps.filter.outputs.scss == 'true' }}
run: npm run lint:scss

- name: Build External
if: ${{ steps.filter.outputs._external == 'true' }}
run: npm run build-external

- name: Build Eleventy
if: ${{ steps.filter.outputs.md == 'true' || steps.filter.outputs.js == 'true' || steps.filter.outputs.yml == 'true' || steps.filter.outputs.njk == 'true'}}
run: npm run ignore && npm run eleventy
if: ${{ steps.filter.outputs.md == 'true' || steps.filter.outputs.js == 'true' || steps.filter.outputs.yml == 'true' || steps.filter.outputs.njk == 'true' }}
run: npm run ignore && npm run maybe-sync-external && npm run eleventy

# Only run tests if the PR touches behavior related files.
- name: Test
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/lighthouse-ci.yml
Expand Up @@ -15,6 +15,9 @@ jobs:
lhci:
if: ${{ github.repository == 'GoogleChrome/developer.chrome.com' }}
runs-on: ubuntu-latest
env:
# Increase RAM limit, and make uncaught exceptions crash (default in 16+).
NODE_OPTIONS: --max_old_space_size=4096 --unhandled-rejections=strict
steps:
- name: Checkout
uses: actions/checkout@v1.0.0
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/percy.yml
Expand Up @@ -23,6 +23,9 @@ jobs:
percy:
if: ${{ github.repository == 'GoogleChrome/developer.chrome.com' }}
runs-on: ubuntu-latest
env:
# Increase RAM limit, and make uncaught exceptions crash (default in 16+).
NODE_OPTIONS: --max_old_space_size=4096 --unhandled-rejections=strict
steps:
- name: Checkout
uses: actions/checkout@v1.0.0
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Expand Up @@ -25,3 +25,7 @@ dist
# Eleventy
# We generate our own .eleventyignore file dynamically during builds
.eleventyignore

# External source files
external/data/
external/local-build-flag
9 changes: 9 additions & 0 deletions external/README.md
@@ -0,0 +1,9 @@
This folder contains the system that deals with external data for developer.chrome.com, that is, data that regularly changes yet is required to publish the site.

- the "data/" folder is used as a working dir (and isn't checked in)

- all scripts inside "build/" are run when `npm run build-external` is run, and a local flag is set

- otherwise, run `npm run sync-external` to retrieve the last known good files stored in Cloud Storage

- running `npm run dev` will automatically pull external data
84 changes: 84 additions & 0 deletions external/build-external.js
@@ -0,0 +1,84 @@
/**
* @fileoverview Builds all external sources. This clears the data/ folder here
* and runs all scripts in build/, which can write new files there.
*
* This is intended for use by Cloud Build, or by site devs doing local work.
*/

const fs = require('fs');
const path = require('path');
const glob = require('glob');
const childProcess = require('child_process');
const crypto = require('crypto');
const syncTestdata = require('./lib/sync-testdata');

async function run() {
let errors = 0;

const scripts = glob.sync('build/*.js', {cwd: __dirname});
scripts.sort(); // run in alphabetical order

const projectRoot = path.join(__dirname, '..');

const dataTarget = path.join(__dirname, 'data');
fs.rmSync(dataTarget, {recursive: true, force: true});
fs.mkdirSync(dataTarget, {recursive: true});

// If this is a CI build, we start with everything found in "fallback/". It won't win, but it
// will be used in cases where credentials and such aren't available.
if (process.env.CI) {
const all = await syncTestdata();
console.info('! Using fallback before build in CI, copied:', all);
}

/** @type {childProcess.CommonExecOptions} */
const options = {cwd: projectRoot, stdio: 'inherit'};

for (const script of scripts) {
const r = path.join(__dirname, script);
console.info('> Running', r);
try {
childProcess.execFileSync('node', [r], options);
} catch (e) {
// We don't log the error here, as we're already getting STDERR piped above.
console.warn(`! Failed to execute "${script}" (${e.status})`);
++errors;
}
}

// Determine the hash for everything in data/.
const hash = crypto.createHash('sha256');
const allFiles = glob.sync('data/**/*', {cwd: __dirname});
if (!allFiles.length) {
throw new Error('no files generated, cowardly refusing to hash');
}

// Sort allFiles, in case glob.sync is inconsistent.
allFiles.sort();

for (const f of allFiles) {
const p = path.join(__dirname, f);
const bytes = fs.readFileSync(p);
hash.update(bytes);
}
const digest = hash.digest('hex');
console.info(
`@ Generated digest=${digest} for ${allFiles.length} files:`,
allFiles
);
fs.writeFileSync(path.join(__dirname, 'data/.hash'), digest);

// If there were any errors, return with a non-zero status code anyway.
if (errors) {
// eslint-disable-next-line no-process-exit
process.exit(1);
}

// Mark this local environment as being build-only, so it won't automatically sync.
const payload =
'// This file blocks synchronizing local data, because you ran `npm run build-external`.\n' +
'// Delete it to bring back automatic sync when you run `npm run dev`.';
fs.writeFileSync(path.join(__dirname, 'local-build-flag'), payload);
}

run();
43 changes: 43 additions & 0 deletions external/build/tweets.js
@@ -0,0 +1,43 @@
/**
* @fileoverview Fetches the most recent tweet from ChromiumDev and writes to storage.
*/

require('dotenv').config();

const fetch = require('node-fetch');
const fs = require('fs');
const path = require('path');

const tweetCount = 1;
const url = `https://api.twitter.com/1.1/statuses/user_timeline.json?user_id=113713261&count=${tweetCount}&include_rts=false&exclude_replies=true&tweet_mode=extended&include_ext_alt_text=true`;

async function run() {
if (!process.env.TWITTER_BEARER) {
if (process.env.CI) {
return; // do nothing, the fallback data will win
}
throw new Error('No `TWITTER_BEARER` environment var for production');
}

const r = await fetch(url, {
headers: {
Authorization: `Bearer ${process.env.TWITTER_BEARER}`,
},
});

if (!r.ok) {
throw new Error(`Could not fetch tweets, status: ${r.status}`);
}

const json = await r.json();

if (json['errors']) {
const error = json['errors'][0];
throw new Error(`${error.code}: ${error.message}`);
}

const targetFile = path.join(__dirname, '../data/tweets.json');
fs.writeFileSync(targetFile, JSON.stringify(json));
}

run();
19 changes: 19 additions & 0 deletions external/lib/sync-testdata.js
@@ -0,0 +1,19 @@
const fs = require('fs');
const path = require('path');

/**
* @return {Promise<string[]>}
*/
module.exports = function () {
// TODO(samthor): This just copies top-level files for now.
const testdataTarget = path.join(__dirname, '../testdata');
const dataTarget = path.join(__dirname, '../data');

fs.mkdirSync(dataTarget, {recursive: true});

const all = fs.readdirSync(testdataTarget);
for (const f of all) {
fs.copyFileSync(path.join(testdataTarget, f), path.join(dataTarget, f));
}
return all;
};
62 changes: 62 additions & 0 deletions external/maybe-sync-external.js
@@ -0,0 +1,62 @@
/**
* @fileoverview This is run as part of `npm run dev`, and will synchronize the latest external
* data at most once every ~12 hours. (This takes a few seconds and it shouldn't block devs who
* don't care from getting their work done.)
*
* This will NOT synchronize if a previous local build was completed, and will warn as such. This
* prevents local changes from being automatically clobbered.
*/

const fs = require('fs');
const path = require('path');
const childProcess = require('child_process');

// See if the synchronized data is more than this old. If so, we run "sync-external".
const syncThresholdMs = 12 * 60 * 60 * 1000;

async function run() {
if (fs.existsSync(path.join(__dirname, 'local-build-flag'))) {
console.info(
'! Not synchronizing external data, previous local build found.' +
'Run `npm run sync-external` to clear it.'
);
return;
}

let mtimeMs = 0;
try {
const stat = fs.statSync(path.join(__dirname, 'data'));
mtimeMs = stat.mtimeMs;
} catch (e) {
// The folder probably doesn't exist.
}
const since = +new Date() - mtimeMs;
if (since < syncThresholdMs) {
// Don't log at all, and don't synchronize, the data is recent enough.
return;
}

const out = childProcess.spawnSync('npm run sync-external', {
shell: true,
stdio: 'inherit',
});
if (out.status) {
if (mtimeMs) {
// There is a folder here so it probably has valid data. Don't throw, but error, perhaps
// we're offline.
console.warn(
'! Cannot synchronize, but old historic data exists in "/external/data/". ' +
'Run `npm run sync-external` to try again.'
);
return;
}

// We have no data, so throw.
throw new Error(
'Could not sync external data into "/external/data/", ' +
`non-zero status: ${out.status}`
);
}
}

run();

0 comments on commit 6a5c07e

Please sign in to comment.