-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.js
executable file
·117 lines (101 loc) · 3.19 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!usr/bin/env node
const querystring = require('querystring');
const fs = require('fs');
const https = require('https');
const jsdom = require('jsdom');
const xpath = require('xpath-html');
const fetchers = require('./fetchers.js');
const { parse } = require('parse5');
const { serializeToString } = require('xmlserializer');
const { JSDOM } = jsdom;
const { readFileSync, writeFileSync, appendFileSync } = fs;
const outputFile = process.argv[2]
const mapping = JSON.parse(fs.readFileSync('field_mappings.json', 'utf-8'));
/**
* Take jsdoc document parse list of relevant 'Li's from it
* (as relevant to DOM structure of 'ldc.lloyds.com/marketdirectory')
* @param object doc
*/
function parseDataList(doc) {
let tab = doc.getElementById('CoverholderTabDetails');
return doc.querySelectorAll('.marketing-directories-results > ul > li');
}
/**
* Function to return the total number of pages to access
* @param object doc
*/
function getNoPages(doc) {
let noPages = doc.querySelector('#CoverholderTabDetails .table-listing-list-pagination-pages > li:last-child > a').innerHTML;
return parseInt(noPages);
}
/**
* Function to check and wipe existing file
* @param string outputFile
* @param array mapping
*/
function prepareOutputFile(outputFile, mapping) {
try {
// Check whether file exists and whether populated already
let fileContents = readFileSync(outputFile);
if (fileContents.length) {
// Empty file contents if already populated
writeFileSync(outputFile, "")
}
let headings = mapping.map(item => item['name']);
appendFileSync(outputFile, headings.join());
} catch (err) {
console.error(err);
}
}
prepareOutputFile(outputFile, mapping);
let options = {
hostname: "ldc.lloyds.com",
path: fetchers.constructPath('cov', 1),
headers: {
'User-Agent': 'Mozilla/5.0'
}
};
fetchers.getData(options)
.then(function(result) {
console.log("Initial request success");
let dom = new JSDOM(result);
let doc = dom.window.document;
let totalPages = getNoPages(doc);
console.log(`Total pages: ${totalPages}`)
for (let i = 1; i <= totalPages; i++) {
let options = {
hostname: "ldc.lloyds.com",
path: fetchers.constructPath('cov', i),
headers: {
'User-Agent': 'Mozilla/5.0'
}
};
let list;
fetchers.getData(options)
.then(function(result) {
console.log(`${options.hostname}/${options.path} success\n`);
let dom = new JSDOM(result);
let doc = dom.window.document;
list = parseDataList(doc);
list.forEach(function (listItem) {
let fieldValues = [];
mapping.forEach(function(field) {
const dom = parse(listItem.outerHTML);
const xhtml = serializeToString(dom);
let querySpace = xpath.fromNode(xhtml);
let element = querySpace.findElement(field['selector']);
fieldValues.push(element.getText());
})
appendFileSync(fieldValues.join());
});
})
.catch(function (reason) {
console.error(`Failed to get page: ${reason} with options:`);
console.error(options);
})
}
})
.catch(function (reason) {
console.error(`Failed to get page: ${reason} with options:`);
console.error(options);
})