Skip to content

Commit

Permalink
Shrunk the JS!
Browse files Browse the repository at this point in the history
  • Loading branch information
B3none committed Apr 25, 2018
1 parent dfc95f7 commit 47b5866
Showing 1 changed file with 4 additions and 68 deletions.
72 changes: 4 additions & 68 deletions src/Scraper/Runner/runner.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,91 +12,27 @@ function getComputedDOM(url, jsLoadTime) {
const timeout = ms => new Promise(resolve => setTimeout(resolve, ms));

return new Promise(async resolve => {
// console.log('Started');

const chrome = await chromeLauncher.launch({
chromeFlags: ['--disable-gpu', '--headless']
});

const protocol = await CDP({port: chrome.port});

// See API docs: https://chromedevtools.github.io/devtools-protocol/
const {Page, Runtime, DOM} = protocol;

await Promise.all([Page.enable(), Runtime.enable(), DOM.enable()]);

Page.navigate({url});

// console.log('Navigated to ', url);

// wait until the page says it's loaded...
Page.loadEventFired(async () => {
// console.log('page loaded - waiting for JS');
await timeout(jsLoadTime || 3000); // give the JS some time to load
// console.log('JS done - parsing');

// get the page source
await timeout(jsLoadTime || 3000);
const rootNode = await DOM.getDocument({depth: -1});

const html = await getOuterHTML(DOM, rootNode.root.nodeId);

protocol.close();
chrome.kill();

resolve(
// JSON.stringify({url, anchors: {inbound, outbound}, objects, iframes})
html
await getOuterHTML(DOM, rootNode.root.nodeId)
);
});
});
}

async function selectNodeIds(DOM, selector, rootNodeId) {
return await new Promise((resolve, reject) => {
DOM.querySelectorAll({selector, nodeId: rootNodeId}, (error, params) => {
if (error) {
reject(error);
} else {
resolve(params.nodeIds);
}
});
});
}

async function selectAttr(DOM, nodeId, attr) {
const attrs = await selectAttrs(DOM, nodeId);
// attrs is formatted like `[name, value, name, value, ...]`
// so we can just search for the index of the name and return the next index along
// however, we need to be careful that the names we match only include the even indexes
// (as all the key indexes are even and all the data indexes are odd)

var idx = attrs.indexOf(attr);

while (idx % 2 == 1) {
if (idx < 0) {
break;
}

if (idx % 2 == 1) {
idx = attrs.indexOf(attr, idx);
}
}

if (idx < 0) {
return null;
}

return attrs[idx + 1];
}

async function selectAttrs(DOM, nodeId) {
return await new Promise((resolve, reject) => {
DOM.getAttributes({nodeId}, (error, params) => {
if (error) {
reject(error);
} else {
resolve(params.attributes);
}
protocol.close();
chrome.kill();
});
});
}
Expand Down

0 comments on commit 47b5866

Please sign in to comment.