-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.ts
109 lines (97 loc) 路 2.61 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import { Page, EvaluateFunc } from "puppeteer";
import { Options } from "./types";
async function puppeteerInfiniteScroller(
page: Page,
options: Options
): Promise<any[]> {
// Destructure options
const {
scrollDelay = 1000,
itemCount = 10,
pageFunction,
selector,
} = options;
// Validate options
if (!selector && !pageFunction) {
throw new Error(
"Either selector or pageFunction must be provided as an option"
);
}
if (selector && typeof selector !== "string") {
throw new Error("Selector must be a string");
}
if (pageFunction && typeof pageFunction !== "function") {
throw new Error("PageFunction must be a function");
}
if (typeof itemCount !== "number") {
throw new Error("ItemCount must be a number");
}
if (typeof scrollDelay !== "number") {
throw new Error("ScrollDelay must be a number");
}
try {
const items = await recursiveScroll(
page,
[],
itemCount,
selector,
pageFunction,
scrollDelay
);
return items;
} catch (e: any) {
console.log("Error:", e.message);
return []; // Return an empty array in case of an error
}
}
async function recursiveScroll(
page: Page,
items: any[],
itemCount: number,
selector: string | undefined,
pageFunction: Function | undefined,
scrollDelay: number
) {
if (itemCount <= items.length) {
// slice the array to match the itemCount
return items.slice(0, itemCount);
}
let extractedItems: object[] = [];
if (selector) {
extractedItems = await page.evaluate((selector) => {
const elements = document.querySelectorAll(selector);
const els = Array.from(elements);
return els.map((element) => {
const attributes = Array.from(element.attributes).reduce(
(acc: any, attr) => {
acc[attr.name] = element.getAttribute(attr.name);
return acc;
},
{}
);
return {
tagName: element.tagName,
...attributes,
};
});
}, selector);
} else if (pageFunction) {
extractedItems = (await page.evaluate(
pageFunction as EvaluateFunc<any>
)) as object[];
}
items.push(...extractedItems);
const previousHeight = await page.evaluate("document.body.scrollHeight");
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)");
await page.waitForFunction(`document.body.scrollHeight > ${previousHeight}`);
await page.waitForTimeout(scrollDelay);
return recursiveScroll(
page,
items,
itemCount,
selector,
pageFunction,
scrollDelay
);
}
module.exports = puppeteerInfiniteScroller;