Skip to content

Commit

Permalink
Added logging messages indicating when new applications are inserted.
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelBone committed Jun 26, 2018
1 parent 88c1203 commit 06d4830
Showing 1 changed file with 17 additions and 9 deletions.
26 changes: 17 additions & 9 deletions scraper.js
Expand Up @@ -26,7 +26,7 @@ function initializeDatabase(callback) {

// Inserts a row in the database if it does not already exist.

function insertRow(database, developmentApplication) {
function insertRow(database, pdfFileName, developmentApplication) {
let sqlStatement = database.prepare("insert or ignore into [data] values (?, ?, ?, ?, ?, ?, ?, ?, ?)");
sqlStatement.run([
developmentApplication.applicationNumber,
Expand All @@ -38,20 +38,26 @@ function insertRow(database, developmentApplication) {
developmentApplication.lodgementDate,
null,
null
]);
sqlStatement.finalize(); // releases any locks
], function(error, row) {
if (error)
console.log(error);
else {
if (this.changes > 0)
console.log(` Inserted new application \"${developmentApplication.applicationNumber}\" from \"${pdfFileName}\" into the database.`);
sqlStatement.finalize(); // releases any locks
}
});
}

// Reads a page using a request.

function requestPage(url, callback) {
console.log(`Requesting page: ${url}`);
request(url, (error, response, body) => {
if (error) {
if (error)
console.log(`Error requesting page ${url}: ${error}`);
return;
}
callback(body);
else
callback(body);
});
}

Expand All @@ -71,7 +77,7 @@ function parsePdfs(database, url) {
if (!pdfUrls.some(url => url === parsedPdfUrl.href)) // avoid duplicates
pdfUrls.push(parsedPdfUrl.href);
});
console.log(`Found ${pdfUrls.length} PDF file(s) to read and parse at ${url}.`);
console.log(`Found ${pdfUrls.length} PDF file(s) to download and parse at ${url}.`);

// Read and parse each PDF, extracting the development application text.

Expand Down Expand Up @@ -162,8 +168,10 @@ function parsePdfs(database, url) {
// rows in a table. If the same development application number already exists on
// a row then that existing row will not be replaced.

let pdfFileName = decodeURIComponent(new urlparser.URL(pdfUrl).pathname.split("/").pop());
console.log(`Found ${developmentApplications.length} development application(s) in \"${pdfFileName}\".`)
for (let developmentApplication of developmentApplications)
insertRow(database, developmentApplication);
insertRow(database, pdfFileName, developmentApplication);
});
}
});
Expand Down

0 comments on commit 06d4830

Please sign in to comment.