Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
613 changes: 613 additions & 0 deletions scrapegraph-js/examples/stealth_mode_example.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion scrapegraph-js/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion scrapegraph-js/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "scrapegraph-js",
"author": "ScrapeGraphAI",
"version": "0.1.6",
"version": "0.2.0",
"description": "Scrape and extract structured data from a webpage using ScrapeGraphAI's APIs. Supports cookies for authentication, infinite scrolling, and pagination.",
"repository": {
"type": "git",
Expand Down
13 changes: 9 additions & 4 deletions scrapegraph-js/src/agenticScraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { getMockResponse } from './utils/mockResponse.js';
* @param {Object} options - Optional configuration options
* @param {boolean} options.mock - Override mock mode for this request
* @param {boolean} options.renderHeavyJs - Whether to render heavy JavaScript on the page
* @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection
* @returns {Promise<Object>} Response from the API containing request_id and initial status
* @throws {Error} Will throw an error in case of an HTTP failure or invalid parameters.
*
Expand Down Expand Up @@ -52,9 +53,9 @@ import { getMockResponse } from './utils/mockResponse.js';
*
* try {
* const result = await agenticScraper(
* apiKey,
* url,
* steps,
* apiKey,
* url,
* steps,
* true,
* "Extract user information and available dashboard sections",
* outputSchema,
Expand All @@ -66,7 +67,7 @@ import { getMockResponse } from './utils/mockResponse.js';
* }
*/
export async function agenticScraper(apiKey, url, steps, useSession = true, userPrompt = null, outputSchema = null, aiExtraction = false, options = {}) {
const { mock = null, renderHeavyJs = false } = options;
const { mock = null, renderHeavyJs = false, stealth = false } = options;

// Check if mock mode is enabled
const useMock = mock !== null ? mock : isMockEnabled();
Expand Down Expand Up @@ -133,6 +134,10 @@ export async function agenticScraper(apiKey, url, steps, useSession = true, user
render_heavy_js: renderHeavyJs,
};

if (stealth) {
payload.stealth = stealth;
}

// Add AI extraction parameters if enabled
if (aiExtraction) {
payload.user_prompt = userPrompt;
Expand Down
7 changes: 6 additions & 1 deletion scrapegraph-js/src/crawl.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import { getMockResponse } from './utils/mockResponse.js';
* @param {number} [options.batchSize=1] - Batch size for processing pages (1-10)
* @param {boolean} [options.mock] - Override mock mode for this request
* @param {boolean} [options.renderHeavyJs=false] - Whether to render heavy JavaScript on the page
* @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection
* @returns {Promise<Object>} The crawl job response
* @throws {Error} Throws an error if the HTTP request fails
*/
Expand All @@ -32,7 +33,7 @@ export async function crawl(
schema,
options = {}
) {
const { mock = null, renderHeavyJs = false } = options;
const { mock = null, renderHeavyJs = false, stealth = false } = options;

// Check if mock mode is enabled
const useMock = mock !== null ? mock : isMockEnabled();
Expand Down Expand Up @@ -83,6 +84,10 @@ export async function crawl(
render_heavy_js: renderHeavyJs,
};

if (stealth) {
payload.stealth = stealth;
}

try {
const response = await axios.post(endpoint, payload, { headers });
return response.data;
Expand Down
7 changes: 6 additions & 1 deletion scrapegraph-js/src/markdownify.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ import { getMockResponse } from './utils/mockResponse.js';
* @param {string} url - The URL of the webpage to be converted.
* @param {Object} options - Optional configuration options.
* @param {boolean} options.mock - Override mock mode for this request
* @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection
* @returns {Promise<string>} A promise that resolves to the markdown representation of the webpage.
* @throws {Error} Throws an error if the HTTP request fails.
*/
export async function markdownify(apiKey, url, options = {}) {
const { mock = null } = options;
const { mock = null, stealth = false } = options;

// Check if mock mode is enabled
const useMock = mock !== null ? mock : isMockEnabled();
Expand All @@ -36,6 +37,10 @@ export async function markdownify(apiKey, url, options = {}) {
website_url: url,
};

if (stealth) {
payload.stealth = stealth;
}

try {
const response = await axios.post(endpoint, payload, { headers });
return response.data;
Expand Down
8 changes: 7 additions & 1 deletion scrapegraph-js/src/scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.j
* @param {Object} options - Optional configuration options.
* @param {boolean} options.renderHeavyJs - Whether to render heavy JavaScript (defaults to false).
* @param {Object} options.headers - Optional custom headers to send with the request.
* @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection
* @returns {Promise<Object>} A promise that resolves to the HTML content and metadata.
* @throws {Error} Throws an error if the HTTP request fails.
*
Expand Down Expand Up @@ -47,7 +48,8 @@ export async function scrape(apiKey, url, options = {}) {
const {
renderHeavyJs = false,
headers: customHeaders = {},
mock = null
mock = null,
stealth = false
} = options;

// Check if mock mode is enabled
Expand All @@ -73,6 +75,10 @@ export async function scrape(apiKey, url, options = {}) {
render_heavy_js: renderHeavyJs,
};

if (stealth) {
payload.stealth = stealth;
}

// Only include headers in payload if they are provided
if (Object.keys(customHeaders).length > 0) {
payload.headers = customHeaders;
Expand Down
7 changes: 6 additions & 1 deletion scrapegraph-js/src/searchScraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,12 @@ import { getMockResponse } from './utils/mockResponse.js';
* @param {boolean} options.renderHeavyJs - Whether to render heavy JavaScript on the page
* @param {boolean} [options.extractionMode=true] - Whether to use AI extraction (true) or markdown conversion (false).
* AI extraction costs 10 credits per page, markdown conversion costs 2 credits per page.
* @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection
* @returns {Promise<string>} Extracted data in JSON format matching the provided schema
* @throws - Will throw an error in case of an HTTP failure.
*/
export async function searchScraper(apiKey, prompt, numResults = 3, schema = null, userAgent = null, options = {}) {
const { mock = null, renderHeavyJs = false, extractionMode = true } = options;
const { mock = null, renderHeavyJs = false, extractionMode = true, stealth = false } = options;

// Check if mock mode is enabled
const useMock = mock !== null ? mock : isMockEnabled();
Expand Down Expand Up @@ -56,6 +57,10 @@ export async function searchScraper(apiKey, prompt, numResults = 3, schema = nul
extraction_mode: extractionMode,
};

if (stealth) {
payload.stealth = stealth;
}

if (schema) {
if (schema instanceof ZodType) {
payload.output_schema = zodToJsonSchema(schema);
Expand Down
7 changes: 6 additions & 1 deletion scrapegraph-js/src/smartScraper.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.j
* @param {number} [totalPages] - Optional number of pages to scrape (1-10). If not provided, only the first page will be scraped.
* @param {Object} [cookies] - Optional cookies object for authentication and session management
* @param {boolean} [renderHeavyJs] - Optional flag to enable heavy JavaScript rendering on the page
* @param {boolean} [stealth] - Optional flag to enable stealth mode to avoid bot detection
* @returns {Promise<string>} Extracted data in JSON format matching the provided schema
* @throws - Will throw an error in case of an HTTP failure.
*/
export async function smartScraper(apiKey, url, prompt, schema = null, numberOfScrolls = null, totalPages = null, cookies = null, options = {}, plain_text = false, renderHeavyJs = false) {
export async function smartScraper(apiKey, url, prompt, schema = null, numberOfScrolls = null, totalPages = null, cookies = null, options = {}, plain_text = false, renderHeavyJs = false, stealth = false) {
const { mock = null } = options;

// Check if mock mode is enabled
Expand Down Expand Up @@ -79,6 +80,10 @@ export async function smartScraper(apiKey, url, prompt, schema = null, numberOfS
payload.total_pages = totalPages;
}

if (stealth) {
payload.stealth = stealth;
}

try {
const response = await axios.post(endpoint, payload, { headers });
return response.data;
Expand Down
Loading
Loading