-
Notifications
You must be signed in to change notification settings - Fork 6
/
scraper.js
71 lines (52 loc) · 2.86 KB
/
scraper.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
const pageScraper = require("./page_scraper"),
profileScraper = require("./profile_scraper"),
puppeteer = require('puppeteer'),
db = require("../db/database"),
fs = require("fs");
function totalTime(origin){
const diffMs = (new Date() - origin); // milliseconds between now & origin
const diffDays = Math.floor(diffMs / 86400000);
const diffHrs = Math.floor((diffMs % 86400000) / 3600000);
const diffMins = Math.round(((diffMs % 86400000) % 3600000) / 60000);
return `Execution in total: ${diffDays} days, ${diffHrs} hours and ${diffMins} minutes`;
}
function scraper () {
console.log("[INFO][Peluditos Project][Scraper] Starting now....");
const startTime = new Date();
const paginationScraper = async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto('http://www.madrid.org/adanweb/html/web/ListadoCompleto.icm');
const result = await page.evaluate(() => {
//@Ulises: Refactor next release :troll:
return document.querySelector("#areAplicacion > div:nth-child(2) > table > tbody > tr > td > div > a:last-child").getAttribute("onclick").split("'")[1];
});
await browser.close();
return result;
};
paginationScraper().then((totalPages) => {
console.log("[INFO][Peluditos Project][Scraper] Total pages to scrap for pagination:", totalPages);
let paginationData = [];
(async () => {
for(let i = 1; i <= totalPages; i++){
let page = await pageScraper(i);
paginationData.push(page);
}
const cleanData = [].concat(...paginationData);
fs.writeFileSync(`${__dirname}/data/pagination.json`, JSON.stringify(cleanData));
console.log("[INFO][Peluditos Project][Scraper] Ended with Pagiantion. File saved in ./data/pagination.json");
console.log(`[INFO][Peluditos Project][Scraper] We discovered ${cleanData.length} Animals in total. Profile details extraction is about to start...`);
for (var i = 0; i < cleanData.length; i++) {
const item = cleanData[i];
let animal = await profileScraper(item);
fs.writeFileSync(`${__dirname}/data/${item.uuid}.json`, JSON.stringify(animal));
console.log("[INFO][Peluditos Project][Scraper] Added to file:", item.uuid);
await db.addAnimal(animal);
console.log("[INFO][Peluditos Project][Scraper] Added to database:", item.uuid);
}
console.log("[INFO][Peluditos Project][Scraper] Ended sucesfully. All data is now in the database");
console.log("[INFO][Peluditos Project][Scraper]", totalTime(startTime));
})();
}).catch(console.log);
}
module.exports = scraper