From 47b58664a7023c39fb3a3a41c87f34809574715e Mon Sep 17 00:00:00 2001 From: B3none Date: Wed, 25 Apr 2018 15:34:18 +0100 Subject: [PATCH] Shrunk the JS! --- src/Scraper/Runner/runner.js | 72 ++---------------------------------- 1 file changed, 4 insertions(+), 68 deletions(-) diff --git a/src/Scraper/Runner/runner.js b/src/Scraper/Runner/runner.js index 34356a1..38af6d9 100644 --- a/src/Scraper/Runner/runner.js +++ b/src/Scraper/Runner/runner.js @@ -12,91 +12,27 @@ function getComputedDOM(url, jsLoadTime) { const timeout = ms => new Promise(resolve => setTimeout(resolve, ms)); return new Promise(async resolve => { - // console.log('Started'); - const chrome = await chromeLauncher.launch({ chromeFlags: ['--disable-gpu', '--headless'] }); const protocol = await CDP({port: chrome.port}); - - // See API docs: https://chromedevtools.github.io/devtools-protocol/ const {Page, Runtime, DOM} = protocol; await Promise.all([Page.enable(), Runtime.enable(), DOM.enable()]); Page.navigate({url}); - // console.log('Navigated to ', url); - - // wait until the page says it's loaded... Page.loadEventFired(async () => { - // console.log('page loaded - waiting for JS'); - await timeout(jsLoadTime || 3000); // give the JS some time to load - // console.log('JS done - parsing'); - - // get the page source + await timeout(jsLoadTime || 3000); const rootNode = await DOM.getDocument({depth: -1}); - const html = await getOuterHTML(DOM, rootNode.root.nodeId); - - protocol.close(); - chrome.kill(); - resolve( - // JSON.stringify({url, anchors: {inbound, outbound}, objects, iframes}) - html + await getOuterHTML(DOM, rootNode.root.nodeId) ); - }); - }); -} - -async function selectNodeIds(DOM, selector, rootNodeId) { - return await new Promise((resolve, reject) => { - DOM.querySelectorAll({selector, nodeId: rootNodeId}, (error, params) => { - if (error) { - reject(error); - } else { - resolve(params.nodeIds); - } - }); - }); -} - -async function selectAttr(DOM, nodeId, attr) { - const attrs = await selectAttrs(DOM, nodeId); - // attrs is formatted like `[name, value, name, value, ...]` - // so we can just search for the index of the name and return the next index along - // however, we need to be careful that the names we match only include the even indexes - // (as all the key indexes are even and all the data indexes are odd) - - var idx = attrs.indexOf(attr); - while (idx % 2 == 1) { - if (idx < 0) { - break; - } - - if (idx % 2 == 1) { - idx = attrs.indexOf(attr, idx); - } - } - - if (idx < 0) { - return null; - } - - return attrs[idx + 1]; -} - -async function selectAttrs(DOM, nodeId) { - return await new Promise((resolve, reject) => { - DOM.getAttributes({nodeId}, (error, params) => { - if (error) { - reject(error); - } else { - resolve(params.attributes); - } + protocol.close(); + chrome.kill(); }); }); }