From e8379302f688e4d38fd08475a02cc78dc0823510 Mon Sep 17 00:00:00 2001
From: Luan Cazarine <luanhc@gmail.com>
Date: Fri, 30 Aug 2024 12:09:13 -0300
Subject: [PATCH 1/5] scrapfly init

---
 .../actions/account-info/account-info.mjs     |  18 ++
 .../ai-data-extraction/ai-data-extraction.mjs |  74 ++++++++
 .../actions/scrape-page/scrape-page.mjs       | 172 ++++++++++++++++++
 components/scrapfly/package.json              |   2 +-
 components/scrapfly/scrapfly.app.mjs          |  82 ++++++++-
 5 files changed, 342 insertions(+), 6 deletions(-)
 create mode 100644 components/scrapfly/actions/account-info/account-info.mjs
 create mode 100644 components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs
 create mode 100644 components/scrapfly/actions/scrape-page/scrape-page.mjs

diff --git a/components/scrapfly/actions/account-info/account-info.mjs b/components/scrapfly/actions/account-info/account-info.mjs
new file mode 100644
index 0000000000000..9736c0e83edff
--- /dev/null
+++ b/components/scrapfly/actions/account-info/account-info.mjs
@@ -0,0 +1,18 @@
+import scrapfly from "../../scrapfly.app.mjs";
+import { axios } from "@pipedream/platform";
+
+export default {
+  key: "scrapfly-account-info",
+  name: "Retrieve Scrapfly Account Info",
+  description: "Retrieve current subscription and account usage details from Scrapfly. [See the documentation](https://scrapfly.io/docs/account#api)",
+  version: "0.0.{{ts}}",
+  type: "action",
+  props: {
+    scrapfly,
+  },
+  async run({ $ }) {
+    const response = await this.scrapfly.getSubscriptionAndUsageDetails();
+    $.export("$summary", "Successfully retrieved account information");
+    return response;
+  },
+};
diff --git a/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs b/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs
new file mode 100644
index 0000000000000..050256a364737
--- /dev/null
+++ b/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs
@@ -0,0 +1,74 @@
+import scrapfly from "../../scrapfly.app.mjs";
+import { axios } from "@pipedream/platform";
+
+export default {
+  key: "scrapfly-ai-data-extraction",
+  name: "AI Data Extraction",
+  description: "Automate content extraction from any text-based source using AI, LLM, and custom parsing. [See the documentation](https://scrapfly.io/docs/extraction-api/getting-started)",
+  version: "0.0.{{ts}}",
+  type: "action",
+  props: {
+    scrapfly,
+    key: {
+      propDefinition: [
+        scrapfly,
+        "key",
+      ],
+    },
+    body: {
+      propDefinition: [
+        scrapfly,
+        "body",
+      ],
+    },
+    contentType: {
+      propDefinition: [
+        scrapfly,
+        "contentType",
+      ],
+    },
+    extractionPrompt: {
+      type: "string",
+      label: "Extraction Prompt",
+      description: "Instruction to extract data or ask a question on the scraped content with an LLM (Large Language Model).",
+      optional: true,
+    },
+    extractionTemplate: {
+      type: "string",
+      label: "Extraction Template",
+      description: "Define an extraction template to get structured data. Use an ephemeral template (declared on the fly on the API call) or a stored template (declared in the dashboard) by using the template name.",
+      optional: true,
+    },
+    extractionModel: {
+      type: "string",
+      label: "Extraction Model",
+      description: "AI Extraction to auto parse document to get structured data. E.g., `product`, `review`, `real-estate`, `article`.",
+      optional: true,
+    },
+    charset: {
+      type: "string",
+      label: "Charset",
+      description: "Charset of the document passed in the body. If you are not sure, you can use the `auto` value and Scrapfly will try to detect it.",
+      default: "auto",
+      optional: true,
+    },
+  },
+  async run({ $ }) {
+    const params = {
+      extraction_prompt: this.extractionPrompt,
+      extraction_template: this.extractionTemplate,
+      extraction_model: this.extractionModel,
+      charset: this.charset,
+    };
+
+    const response = await this.scrapfly.automateContentExtraction({
+      key: this.key,
+      body: this.body,
+      contentType: this.contentType,
+      ...params,
+    });
+
+    $.export("$summary", "Successfully extracted content");
+    return response;
+  },
+};
diff --git a/components/scrapfly/actions/scrape-page/scrape-page.mjs b/components/scrapfly/actions/scrape-page/scrape-page.mjs
new file mode 100644
index 0000000000000..683d694b17bb3
--- /dev/null
+++ b/components/scrapfly/actions/scrape-page/scrape-page.mjs
@@ -0,0 +1,172 @@
+import scrapfly from "../../scrapfly.app.mjs";
+import { axios } from "@pipedream/platform";
+
+export default {
+  key: "scrapfly-scrape-page",
+  name: "Scrape Page",
+  description: "Extract data from a specified web page. [See the documentation](https://scrapfly.io/docs/scrape-api/getting-started)",
+  version: "0.0.{{ts}}",
+  type: "action",
+  props: {
+    scrapfly,
+    url: {
+      propDefinition: [
+        scrapfly,
+        "url",
+      ],
+    },
+    key: {
+      propDefinition: [
+        scrapfly,
+        "key",
+      ],
+    },
+    contentType: {
+      propDefinition: [
+        scrapfly,
+        "contentType",
+      ],
+      optional: true,
+    },
+    body: {
+      propDefinition: [
+        scrapfly,
+        "body",
+      ],
+      optional: true,
+    },
+    proxyPool: {
+      type: "string",
+      label: "Proxy Pool",
+      description: "Select the proxy pool to use.",
+      optional: true,
+    },
+    headers: {
+      type: "string[]",
+      label: "Headers",
+      description: "Pass custom headers to the request. Must be URL encoded.",
+      optional: true,
+    },
+    country: {
+      type: "string",
+      label: "Country",
+      description: "Proxy country location.",
+      optional: true,
+    },
+    lang: {
+      type: "string",
+      label: "Language",
+      description: "Select page language.",
+      optional: true,
+    },
+    os: {
+      type: "string",
+      label: "Operating System",
+      description: "Operating System, if not selected it's random.",
+      optional: true,
+    },
+    timeout: {
+      type: "integer",
+      label: "Timeout",
+      description: "Timeout in milliseconds.",
+      optional: true,
+    },
+    format: {
+      type: "string",
+      label: "Format",
+      description: "Format of the response.",
+      options: [
+        "raw",
+        "text",
+        "markdown",
+        "clean_html",
+        "json",
+      ],
+      optional: true,
+    },
+    retry: {
+      type: "boolean",
+      label: "Retry",
+      description: "Improve reliability with retries on failure.",
+      optional: true,
+      default: true,
+    },
+    proxifiedResponse: {
+      type: "boolean",
+      label: "Proxified Response",
+      description: "Return the content of the page directly.",
+      optional: true,
+      default: false,
+    },
+    debug: {
+      type: "boolean",
+      label: "Debug",
+      description: "Store the API result and take a screenshot if rendering js is enabled.",
+      optional: true,
+      default: false,
+    },
+    correlationId: {
+      type: "string",
+      label: "Correlation ID",
+      description: "Helper ID for correlating a group of scrapes.",
+      optional: true,
+    },
+    tags: {
+      type: "string[]",
+      label: "Tags",
+      description: "Add tags to your scrapes to group them.",
+      optional: true,
+    },
+    dns: {
+      type: "boolean",
+      label: "DNS",
+      description: "Query and retrieve target DNS information.",
+      optional: true,
+      default: false,
+    },
+    ssl: {
+      type: "boolean",
+      label: "SSL",
+      description: "SSL option.",
+      optional: true,
+      default: true,
+    },
+  },
+  async run({ $ }) {
+    const params = {
+      proxy_pool: this.proxyPool,
+      country: this.country,
+      lang: this.lang,
+      os: this.os,
+      timeout: this.timeout,
+      format: this.format,
+      retry: this.retry,
+      proxified_response: this.proxifiedResponse,
+      debug: this.debug,
+      correlation_id: this.correlationId,
+      tags: this.tags,
+      dns: this.dns,
+      ssl: this.ssl,
+    };
+
+    if (this.headers) {
+      params.headers = this.headers.reduce((acc, header) => {
+        const [
+          key,
+          value,
+        ] = header.split("=");
+        acc[key] = value;
+        return acc;
+      }, {});
+    }
+
+    const response = await this.scrapfly.extractWebPageContent({
+      url: this.url,
+      key: this.key,
+      ...params,
+    });
+
+    $.export("$summary", `Successfully scraped content from ${this.url}`);
+    return response;
+  },
+};
diff --git a/components/scrapfly/package.json b/components/scrapfly/package.json
index 3f16ae334d2ad..baaaf2eded7a4 100644
--- a/components/scrapfly/package.json
+++ b/components/scrapfly/package.json
@@ -12,4 +12,4 @@
   "publishConfig": {
     "access": "public"
   }
-}
\ No newline at end of file
+}
diff --git a/components/scrapfly/scrapfly.app.mjs b/components/scrapfly/scrapfly.app.mjs
index fdde809425126..de6cc2e549d8a 100644
--- a/components/scrapfly/scrapfly.app.mjs
+++ b/components/scrapfly/scrapfly.app.mjs
@@ -1,11 +1,83 @@
+import { axios } from "@pipedream/platform";
+
 export default {
   type: "app",
   app: "scrapfly",
-  propDefinitions: {},
+  propDefinitions: {
+    url: {
+      type: "string",
+      label: "URL",
+      description: "The URL of the web page to extract data from",
+    },
+    key: {
+      type: "string",
+      label: "API Key",
+      description: "Your Scrapfly API key",
+    },
+    body: {
+      type: "string",
+      label: "Body",
+      description: "The content of the page you want to extract data from",
+    },
+    contentType: {
+      type: "string",
+      label: "Content Type",
+      description: "The content type of the document passed in the body",
+      options: [
+        "text/html",
+        "text/markdown",
+        "text/plain",
+        "application/xml",
+      ],
+    },
+  },
   methods: {
-    // this.$auth contains connected account data
-    authKeys() {
-      console.log(Object.keys(this.$auth));
+    _baseUrl() {
+      return "https://api.scrapfly.io";
+    },
+    async _makeRequest(opts = {}) {
+      const {
+        $ = this, method = "GET", path = "/", headers, ...otherOpts
+      } = opts;
+      return axios($, {
+        ...otherOpts,
+        method,
+        url: this._baseUrl() + path,
+        headers: {
+          ...headers,
+          Authorization: `Bearer ${this.$auth.api_key}`,
+        },
+      });
+    },
+    async getSubscriptionAndUsageDetails() {
+      return this._makeRequest({
+        path: "/account",
+      });
+    },
+    async extractWebPageContent({
+      url, key, ...params
+    }) {
+      return this._makeRequest({
+        method: "GET",
+        path: `/scrape?url=${encodeURIComponent(url)}&key=${key}`,
+        params,
+      });
+    },
+    async automateContentExtraction({
+      key, body, contentType, ...params
+    }) {
+      return this._makeRequest({
+        method: "POST",
+        path: "/extraction",
+        headers: {
+          "Content-Type": contentType,
+        },
+        data: body,
+        params: {
+          key,
+          ...params,
+        },
+      });
     },
   },
-};
\ No newline at end of file
+};

From 46323660c239fb9da3ef7fd9b20d7d71d193da69 Mon Sep 17 00:00:00 2001
From: Luan Cazarine <luanhc@gmail.com>
Date: Mon, 2 Sep 2024 13:52:47 -0300
Subject: [PATCH 2/5] [Components] scrapfly #13774 Actions  - Account Info  -
 Scrape Page  - AI Data Extraction

---
 .../actions/account-info/account-info.mjs     |   7 +-
 .../ai-data-extraction/ai-data-extraction.mjs |  61 +++++----
 .../actions/scrape-page/scrape-page.mjs       | 123 +++++++-----------
 components/scrapfly/common/constants.mjs      |  24 ++++
 components/scrapfly/common/utils.mjs          |  31 +++++
 components/scrapfly/package.json              |   6 +-
 components/scrapfly/scrapfly.app.mjs          |  63 ++++-----
 7 files changed, 173 insertions(+), 142 deletions(-)
 create mode 100644 components/scrapfly/common/constants.mjs
 create mode 100644 components/scrapfly/common/utils.mjs

diff --git a/components/scrapfly/actions/account-info/account-info.mjs b/components/scrapfly/actions/account-info/account-info.mjs
index 9736c0e83edff..4d7c4969cdfd7 100644
--- a/components/scrapfly/actions/account-info/account-info.mjs
+++ b/components/scrapfly/actions/account-info/account-info.mjs
@@ -1,17 +1,18 @@
 import scrapfly from "../../scrapfly.app.mjs";
-import { axios } from "@pipedream/platform";
 
 export default {
   key: "scrapfly-account-info",
   name: "Retrieve Scrapfly Account Info",
   description: "Retrieve current subscription and account usage details from Scrapfly. [See the documentation](https://scrapfly.io/docs/account#api)",
-  version: "0.0.{{ts}}",
+  version: "0.0.1",
   type: "action",
   props: {
     scrapfly,
   },
   async run({ $ }) {
-    const response = await this.scrapfly.getSubscriptionAndUsageDetails();
+    const response = await this.scrapfly.getAccountInfo({
+      $,
+    });
     $.export("$summary", "Successfully retrieved account information");
     return response;
   },
diff --git a/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs b/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs
index 050256a364737..32aeb46450c94 100644
--- a/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs
+++ b/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs
@@ -1,20 +1,15 @@
+import fs from "fs";
+import { checkTmp } from "../../common/utils.mjs";
 import scrapfly from "../../scrapfly.app.mjs";
-import { axios } from "@pipedream/platform";
 
 export default {
   key: "scrapfly-ai-data-extraction",
   name: "AI Data Extraction",
   description: "Automate content extraction from any text-based source using AI, LLM, and custom parsing. [See the documentation](https://scrapfly.io/docs/extraction-api/getting-started)",
-  version: "0.0.{{ts}}",
+  version: "0.0.1",
   type: "action",
   props: {
     scrapfly,
-    key: {
-      propDefinition: [
-        scrapfly,
-        "key",
-      ],
-    },
     body: {
       propDefinition: [
         scrapfly,
@@ -27,10 +22,17 @@ export default {
         "contentType",
       ],
     },
-    extractionPrompt: {
+    url: {
+      propDefinition: [
+        scrapfly,
+        "url",
+      ],
+    },
+    charset: {
       type: "string",
-      label: "Extraction Prompt",
-      description: "Instruction to extract data or ask a question on the scraped content with an LLM (Large Language Model).",
+      label: "Charset",
+      description: "Charset of the document pass in the body. If you are not sure, you can use the `auto` value and we will try to detect it. Bad charset can lead to bad extraction, so it's important to set it correctly. **The most common charset is `utf-8` for text document and `ascii` for binary**. The symptom of a bad charset is that the text is not correctly displayed (accent, special characters, etc).",
+      default: "auto",
       optional: true,
     },
     extractionTemplate: {
@@ -39,33 +41,40 @@ export default {
       description: "Define an extraction template to get structured data. Use an ephemeral template (declared on the fly on the API call) or a stored template (declared in the dashboard) by using the template name.",
       optional: true,
     },
+    extractionPrompt: {
+      type: "string",
+      label: "Extraction Prompt",
+      description: "Instruction to extract data or ask a question on the scraped content with an LLM (Large Language Model). [Must be url encoded](https://scrapfly.io/web-scraping-tools/urlencode).",
+    },
     extractionModel: {
       type: "string",
       label: "Extraction Model",
       description: "AI Extraction to auto parse document to get structured data. E.g., `product`, `review`, `real-estate`, `article`.",
       optional: true,
     },
-    charset: {
+    webhookName: {
       type: "string",
-      label: "Charset",
-      description: "Charset of the document passed in the body. If you are not sure, you can use the `auto` value and Scrapfly will try to detect it.",
-      default: "auto",
+      label: "Webhook Name",
+      description: "Queue you scrape request and redirect API response to a provided webhook endpoint. You can create a webhook endpoint from your `dashboard`, it takes the name of the webhook. Webhooks are scoped to the given project/env.",
       optional: true,
     },
   },
   async run({ $ }) {
-    const params = {
-      extraction_prompt: this.extractionPrompt,
-      extraction_template: this.extractionTemplate,
-      extraction_model: this.extractionModel,
-      charset: this.charset,
-    };
-
     const response = await this.scrapfly.automateContentExtraction({
-      key: this.key,
-      body: this.body,
-      contentType: this.contentType,
-      ...params,
+      $,
+      headers: {
+        "content-type": this.contentType,
+      },
+      maxBodyLength: Infinity,
+      params: {
+        url: this.url,
+        charset: this.charset,
+        extraction_template: this.extractionTemplate,
+        extraction_prompt: this.extractionPrompt,
+        extraction_model: this.extractionModel,
+        webhook_name: this.webhookName,
+      },
+      data: fs.readFileSync(checkTmp(this.body)).toString(),
     });
 
     $.export("$summary", "Successfully extracted content");
diff --git a/components/scrapfly/actions/scrape-page/scrape-page.mjs b/components/scrapfly/actions/scrape-page/scrape-page.mjs
index 683d694b17bb3..42cb0be647f0a 100644
--- a/components/scrapfly/actions/scrape-page/scrape-page.mjs
+++ b/components/scrapfly/actions/scrape-page/scrape-page.mjs
@@ -1,11 +1,16 @@
+import { ConfigurationError } from "@pipedream/platform";
+import {
+  FORMAT_OPTIONS,
+  PROXY_POOL_OPTIONS,
+} from "../../common/constants.mjs";
+import { parseObject } from "../../common/utils.mjs";
 import scrapfly from "../../scrapfly.app.mjs";
-import { axios } from "@pipedream/platform";
 
 export default {
   key: "scrapfly-scrape-page",
   name: "Scrape Page",
   description: "Extract data from a specified web page. [See the documentation](https://scrapfly.io/docs/scrape-api/getting-started)",
-  version: "0.0.{{ts}}",
+  version: "0.0.1",
   type: "action",
   props: {
     scrapfly,
@@ -15,73 +20,48 @@ export default {
         "url",
       ],
     },
-    key: {
-      propDefinition: [
-        scrapfly,
-        "key",
-      ],
-    },
-    contentType: {
-      propDefinition: [
-        scrapfly,
-        "contentType",
-      ],
-      optional: true,
-    },
-    body: {
-      propDefinition: [
-        scrapfly,
-        "body",
-      ],
-      optional: true,
-    },
     proxyPool: {
       type: "string",
       label: "Proxy Pool",
       description: "Select the proxy pool to use.",
       optional: true,
+      options: PROXY_POOL_OPTIONS,
     },
     headers: {
-      type: "string[]",
+      type: "object",
       label: "Headers",
-      description: "Pass custom headers to the request. Must be URL encoded.",
+      description: "Pass custom headers to the request.",
       optional: true,
     },
     country: {
       type: "string",
       label: "Country",
-      description: "Proxy country location.",
+      description: "Proxy country location. If not set it chooses a random location available. A reference to a country must be ISO 3166 alpha-2 (2 letters). The available countries are defined by the proxy pool you use. [See the documentation](https://scrapfly.io/docs/scrape-api/getting-started#spec)",
       optional: true,
     },
     lang: {
       type: "string",
       label: "Language",
-      description: "Select page language.",
+      description: "Select page language. By default it uses the language of the selected proxy location. Behind the scenes, it configures the `Accept-Language` HTTP header. If the website support the language, the content will be in that lang. **Note: you cannot set headers `Accept-Language` header manually**. [See the documentation](https://scrapfly.io/docs/scrape-api/getting-started#spec)",
       optional: true,
     },
     os: {
       type: "string",
       label: "Operating System",
-      description: "Operating System, if not selected it's random.",
+      description: "Operating System, if not selected it's random. **Note: you cannot set os parameter and `User-Agent` header at the same time.** [See the documentation](https://scrapfly.io/docs/scrape-api/getting-started#spec)",
       optional: true,
     },
     timeout: {
       type: "integer",
       label: "Timeout",
-      description: "Timeout in milliseconds.",
+      description: "Timeout in milliseconds. It represents the maximum time allowed for Scrapfly to perform the scrape. Since `timeout` is not trivial to understand see our [extended documentation on timeouts](https://scrapfly.io/docs/scrape-api/understand-timeout)",
       optional: true,
     },
     format: {
       type: "string",
       label: "Format",
       description: "Format of the response.",
-      options: [
-        "raw",
-        "text",
-        "markdown",
-        "clean_html",
-        "json",
-      ],
+      options: FORMAT_OPTIONS,
       optional: true,
     },
     retry: {
@@ -89,21 +69,18 @@ export default {
       label: "Retry",
       description: "Improve reliability with retries on failure.",
       optional: true,
-      default: true,
     },
     proxifiedResponse: {
       type: "boolean",
       label: "Proxified Response",
       description: "Return the content of the page directly.",
       optional: true,
-      default: false,
     },
     debug: {
       type: "boolean",
       label: "Debug",
       description: "Store the API result and take a screenshot if rendering js is enabled.",
       optional: true,
-      default: false,
     },
     correlationId: {
       type: "string",
@@ -122,51 +99,51 @@ export default {
       label: "DNS",
       description: "Query and retrieve target DNS information.",
       optional: true,
-      default: false,
     },
     ssl: {
       type: "boolean",
       label: "SSL",
       description: "SSL option.",
       optional: true,
-      default: true,
     },
   },
   async run({ $ }) {
-    const params = {
-      proxy_pool: this.proxyPool,
-      country: this.country,
-      lang: this.lang,
-      os: this.os,
-      timeout: this.timeout,
-      format: this.format,
-      retry: this.retry,
-      proxified_response: this.proxifiedResponse,
-      debug: this.debug,
-      correlation_id: this.correlationId,
-      tags: this.tags,
-      dns: this.dns,
-      ssl: this.ssl,
-    };
-
-    if (this.headers) {
-      params.headers = this.headers.reduce((acc, header) => {
-        const [
-          key,
-          value,
-        ] = header.split("=");
-        acc[key] = value;
-        return acc;
-      }, {});
-    }
+    try {
+      let headers = "";
+      if (this.headers) {
+        headers = Object.keys(parseObject(this.headers))
+          .reduce((acc, key) => {
+            acc += `headers[${key}]=${encodeURIComponent(this.headers[key])}`;
+            return acc;
+          }, "");
+      }
+      const params = {
+        url: this.url,
+        proxy_pool: this.proxyPool,
+        country: this.country,
+        lang: this.lang,
+        os: this.os,
+        timeout: this.timeout,
+        format: this.format,
+        retry: this.retry,
+        proxified_response: this.proxifiedResponse,
+        debug: this.debug,
+        correlation_id: this.correlationId,
+        tags: parseObject(this.tags),
+        dns: this.dns,
+        ssl: this.ssl,
+        ...headers,
+      };
 
-    const response = await this.scrapfly.extractWebPageContent({
-      url: this.url,
-      key: this.key,
-      ...params,
-    });
+      const response = await this.scrapfly.extractWebPageContent({
+        $,
+        params,
+      });
 
-    $.export("$summary", `Successfully scraped content from ${this.url}`);
-    return response;
+      $.export("$summary", `Successfully scraped content from ${this.url}`);
+      return response;
+    } catch ({ response: { data: { message } } }) {
+      throw new ConfigurationError(message);
+    }
   },
 };
diff --git a/components/scrapfly/common/constants.mjs b/components/scrapfly/common/constants.mjs
new file mode 100644
index 0000000000000..640d307bef44a
--- /dev/null
+++ b/components/scrapfly/common/constants.mjs
@@ -0,0 +1,24 @@
+export const PROXY_POOL_OPTIONS = [
+  "public_datacenter_pool",
+  "public_resitential_pool",
+];
+
+export const FORMAT_OPTIONS = [
+  "raw",
+  "text",
+  "markdown",
+  "markdown:no_links,no_imagesLLM",
+  "clean_html",
+  "json",
+];
+
+export const CONTENT_TYPE_OPTIONS = [
+  "application/json",
+  "application/jsonld",
+  "application/xml",
+  "text/plain",
+  "text/html",
+  "text/markdown",
+  "text/csv",
+  "application/xhtml+xml",
+];
diff --git a/components/scrapfly/common/utils.mjs b/components/scrapfly/common/utils.mjs
new file mode 100644
index 0000000000000..0cd1a12b6a4ba
--- /dev/null
+++ b/components/scrapfly/common/utils.mjs
@@ -0,0 +1,31 @@
+export const checkTmp = (filename) => {
+  if (!filename.startsWith("/tmp")) {
+    return `/tmp/${filename}`;
+  }
+  return filename;
+};
+
+export const parseObject = (obj) => {
+  if (!obj) return undefined;
+
+  if (Array.isArray(obj)) {
+    return obj.map((item) => {
+      if (typeof item === "string") {
+        try {
+          return JSON.parse(item);
+        } catch (e) {
+          return item;
+        }
+      }
+      return item;
+    });
+  }
+  if (typeof obj === "string") {
+    try {
+      return JSON.parse(obj);
+    } catch (e) {
+      return obj;
+    }
+  }
+  return obj;
+};
diff --git a/components/scrapfly/package.json b/components/scrapfly/package.json
index baaaf2eded7a4..f2bc0dcc759f4 100644
--- a/components/scrapfly/package.json
+++ b/components/scrapfly/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@pipedream/scrapfly",
-  "version": "0.0.1",
+  "version": "0.1.0",
   "description": "Pipedream Scrapfly Components",
   "main": "scrapfly.app.mjs",
   "keywords": [
@@ -11,5 +11,9 @@
   "author": "Pipedream <support@pipedream.com> (https://pipedream.com/)",
   "publishConfig": {
     "access": "public"
+  },
+  "dependencies": {
+    "@pipedream/platform": "^3.0.1"
   }
 }
+
diff --git a/components/scrapfly/scrapfly.app.mjs b/components/scrapfly/scrapfly.app.mjs
index de6cc2e549d8a..53e4c77fa946b 100644
--- a/components/scrapfly/scrapfly.app.mjs
+++ b/components/scrapfly/scrapfly.app.mjs
@@ -1,4 +1,5 @@
 import { axios } from "@pipedream/platform";
+import { CONTENT_TYPE_OPTIONS } from "./common/constants.mjs";
 
 export default {
   type: "app",
@@ -7,76 +8,60 @@ export default {
     url: {
       type: "string",
       label: "URL",
-      description: "The URL of the web page to extract data from",
-    },
-    key: {
-      type: "string",
-      label: "API Key",
-      description: "Your Scrapfly API key",
+      description: "This URL is used to transform any relative URLs in the document into absolute URLs automatically. It can be either the base URL or the exact URL of the document. [Must be url encoded](https://scrapfly.io/web-scraping-tools/urlencode).",
     },
     body: {
       type: "string",
       label: "Body",
-      description: "The content of the page you want to extract data from",
+      description: "The request body must contain the content of the page you want to extract data from. The content must be in the format specified by the `content-type` header or via the `content_type` HTTP parameter.  Provide a file from `/tmp`. To upload a file to `/tmp` folder, please follow the doc [here](https://pipedream.com/docs/code/nodejs/working-with-files/#writing-a-file-to-tmp)",
     },
     contentType: {
       type: "string",
       label: "Content Type",
-      description: "The content type of the document passed in the body",
-      options: [
-        "text/html",
-        "text/markdown",
-        "text/plain",
-        "application/xml",
-      ],
+      description: "Content type of the document pass in the body - You must specify the content type of the document by using this parameter or via the `content-type` header. This parameter has priority over the `content-type` header.",
+      options: CONTENT_TYPE_OPTIONS,
     },
   },
   methods: {
     _baseUrl() {
       return "https://api.scrapfly.io";
     },
-    async _makeRequest(opts = {}) {
-      const {
-        $ = this, method = "GET", path = "/", headers, ...otherOpts
-      } = opts;
+    _params(params = {}) {
+      return {
+        ...params,
+        key: `${this.$auth.api_key}`,
+      };
+    },
+    _makeRequest({
+      $ = this, params, path, ...opts
+    }) {
       return axios($, {
-        ...otherOpts,
-        method,
         url: this._baseUrl() + path,
-        headers: {
-          ...headers,
-          Authorization: `Bearer ${this.$auth.api_key}`,
-        },
+        params: this._params(params),
+        ...opts,
       });
     },
-    async getSubscriptionAndUsageDetails() {
+    getAccountInfo(opts = {}) {
       return this._makeRequest({
         path: "/account",
+        ...opts,
       });
     },
-    async extractWebPageContent({
-      url, key, ...params
+    extractWebPageContent({
+      params, ...opts
     }) {
       return this._makeRequest({
         method: "GET",
-        path: `/scrape?url=${encodeURIComponent(url)}&key=${key}`,
+        path: "/scrape",
         params,
+        ...opts,
       });
     },
-    async automateContentExtraction({
-      key, body, contentType, ...params
-    }) {
+    automateContentExtraction(opts = {}) {
       return this._makeRequest({
         method: "POST",
         path: "/extraction",
-        headers: {
-          "Content-Type": contentType,
-        },
-        data: body,
-        params: {
-          key,
-          ...params,
-        },
+        ...opts,
       });
     },
   },

From 2afb3ddef2347678d4751a493bb5db77f772a0b7 Mon Sep 17 00:00:00 2001
From: Luan Cazarine <luanhc@gmail.com>
Date: Mon, 2 Sep 2024 13:54:11 -0300
Subject: [PATCH 3/5] pnpm update

---
 pnpm-lock.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index b94c3b11b319e..21b86670bb842 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -8221,7 +8221,10 @@ importers:
     specifiers: {}
 
   components/scrapfly:
-    specifiers: {}
+    specifiers:
+      '@pipedream/platform': ^3.0.1
+    dependencies:
+      '@pipedream/platform': 3.0.1
 
   components/scrapingant:
     specifiers: {}

From a314f743e0686a63f05110d1ebe39d9eefa6d516 Mon Sep 17 00:00:00 2001
From: Luan Cazarine <luanhc@gmail.com>
Date: Mon, 2 Sep 2024 14:13:39 -0300
Subject: [PATCH 4/5] Update components/scrapfly/common/constants.mjs

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 components/scrapfly/common/constants.mjs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/components/scrapfly/common/constants.mjs b/components/scrapfly/common/constants.mjs
index 640d307bef44a..5abdd1402a944 100644
--- a/components/scrapfly/common/constants.mjs
+++ b/components/scrapfly/common/constants.mjs
@@ -1,6 +1,6 @@
 export const PROXY_POOL_OPTIONS = [
   "public_datacenter_pool",
-  "public_resitential_pool",
+  "public_residential_pool",
 ];
 
 export const FORMAT_OPTIONS = [

From 56e2f2f1de315dd4092a2ef9798f56d55024b40c Mon Sep 17 00:00:00 2001
From: Luan Cazarine <luanhc@gmail.com>
Date: Wed, 4 Sep 2024 15:30:59 -0300
Subject: [PATCH 5/5] some adjusts

---
 .../ai-data-extraction/ai-data-extraction.mjs |   5 +
 .../actions/scrape-page/scrape-page.mjs       |  39 +-
 components/scrapfly/common/constants.mjs      | 707 ++++++++++++++++++
 3 files changed, 735 insertions(+), 16 deletions(-)

diff --git a/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs b/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs
index 32aeb46450c94..0e2078c4135c7 100644
--- a/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs
+++ b/components/scrapfly/actions/ai-data-extraction/ai-data-extraction.mjs
@@ -1,3 +1,4 @@
+import { ConfigurationError } from "@pipedream/platform";
 import fs from "fs";
 import { checkTmp } from "../../common/utils.mjs";
 import scrapfly from "../../scrapfly.app.mjs";
@@ -45,6 +46,7 @@ export default {
       type: "string",
       label: "Extraction Prompt",
       description: "Instruction to extract data or ask a question on the scraped content with an LLM (Large Language Model). [Must be url encoded](https://scrapfly.io/web-scraping-tools/urlencode).",
+      optional: true,
     },
     extractionModel: {
       type: "string",
@@ -60,6 +62,9 @@ export default {
     },
   },
   async run({ $ }) {
+    if (!this.extractionTemplate && !this.extractionPrompt && !this.extractionModel) {
+      throw new ConfigurationError("You must provide at least **Extraction Template**, **Extraction Prompt** or **Extraction Model**");
+    }
     const response = await this.scrapfly.automateContentExtraction({
       $,
       headers: {
diff --git a/components/scrapfly/actions/scrape-page/scrape-page.mjs b/components/scrapfly/actions/scrape-page/scrape-page.mjs
index 42cb0be647f0a..554872c4a1c1f 100644
--- a/components/scrapfly/actions/scrape-page/scrape-page.mjs
+++ b/components/scrapfly/actions/scrape-page/scrape-page.mjs
@@ -1,6 +1,7 @@
 import { ConfigurationError } from "@pipedream/platform";
 import {
   FORMAT_OPTIONS,
+  PROXY_COUNTRY_OPTIONS,
   PROXY_POOL_OPTIONS,
 } from "../../common/constants.mjs";
 import { parseObject } from "../../common/utils.mjs";
@@ -20,25 +21,12 @@ export default {
         "url",
       ],
     },
-    proxyPool: {
-      type: "string",
-      label: "Proxy Pool",
-      description: "Select the proxy pool to use.",
-      optional: true,
-      options: PROXY_POOL_OPTIONS,
-    },
     headers: {
       type: "object",
       label: "Headers",
       description: "Pass custom headers to the request.",
       optional: true,
     },
-    country: {
-      type: "string",
-      label: "Country",
-      description: "Proxy country location. If not set it chooses a random location available. A reference to a country must be ISO 3166 alpha-2 (2 letters). The available countries are defined by the proxy pool you use. [See the documentation](https://scrapfly.io/docs/scrape-api/getting-started#spec)",
-      optional: true,
-    },
     lang: {
       type: "string",
       label: "Language",
@@ -106,6 +94,25 @@ export default {
       description: "SSL option.",
       optional: true,
     },
+    proxyPool: {
+      type: "string",
+      label: "Proxy Pool",
+      description: "Select the proxy pool to use.",
+      optional: true,
+      options: PROXY_POOL_OPTIONS,
+      reloadProps: true,
+    },
+  },
+  async additionalProps() {
+    const props = {};
+    props.country = {
+      type: "string",
+      label: "Country",
+      description: "Proxy country location. If not set it chooses a random location available. A reference to a country must be ISO 3166 alpha-2 (2 letters). The available countries are defined by the proxy pool you use. [See the documentation](https://scrapfly.io/docs/scrape-api/getting-started#spec)",
+      optional: true,
+      options: PROXY_COUNTRY_OPTIONS[this.proxyPool],
+    };
+    return props;
   },
   async run({ $ }) {
     try {
@@ -113,9 +120,9 @@ export default {
       if (this.headers) {
         headers = Object.keys(parseObject(this.headers))
           .reduce((acc, key) => {
-            acc += `headers[${key}]=${encodeURIComponent(this.headers[key])}`;
+            acc.push(`headers[${key}]=${encodeURIComponent(this.headers[key])}`);
             return acc;
-          }, "");
+          }, []);
       }
       const params = {
         url: this.url,
@@ -132,7 +139,7 @@ export default {
         tags: parseObject(this.tags),
         dns: this.dns,
         ssl: this.ssl,
-        ...headers,
+        headers,
       };
 
       const response = await this.scrapfly.extractWebPageContent({
diff --git a/components/scrapfly/common/constants.mjs b/components/scrapfly/common/constants.mjs
index 5abdd1402a944..625839df3ff85 100644
--- a/components/scrapfly/common/constants.mjs
+++ b/components/scrapfly/common/constants.mjs
@@ -22,3 +22,710 @@ export const CONTENT_TYPE_OPTIONS = [
   "text/csv",
   "application/xhtml+xml",
 ];
+
+export const PROXY_COUNTRY_OPTIONS = {
+  public_datacenter_pool: [
+    {
+      label: "Albania",
+      value: "al",
+    },
+    {
+      label: "Armenia",
+      value: "am",
+    },
+    {
+      label: "Argentina",
+      value: "ar",
+    },
+    {
+      label: "Austria",
+      value: "at",
+    },
+    {
+      label: "Australia",
+      value: "au",
+    },
+    {
+      label: "Belgium",
+      value: "be",
+    },
+    {
+      label: "Bulgaria",
+      value: "bg",
+    },
+    {
+      label: "Bolivia",
+      value: "bo",
+    },
+    {
+      label: "Brazil",
+      value: "br",
+    },
+    {
+      label: "Belarus",
+      value: "by",
+    },
+    {
+      label: "Canada",
+      value: "ca",
+    },
+    {
+      label: "Switzerland",
+      value: "ch",
+    },
+    {
+      label: "Chile",
+      value: "cl",
+    },
+    {
+      label: "China",
+      value: "cn",
+    },
+    {
+      label: "Colombia",
+      value: "co",
+    },
+    {
+      label: "Czechia",
+      value: "cz",
+    },
+    {
+      label: "Germany",
+      value: "de",
+    },
+    {
+      label: "Denmark",
+      value: "dk",
+    },
+    {
+      label: "Ecuador",
+      value: "ec",
+    },
+    {
+      label: "Estonia",
+      value: "ee",
+    },
+    {
+      label: "Spain",
+      value: "es",
+    },
+    {
+      label: "Finland",
+      value: "fi",
+    },
+    {
+      label: "France",
+      value: "fr",
+    },
+    {
+      label: "United Kingdom",
+      value: "gb",
+    },
+    {
+      label: "Georgia",
+      value: "ge",
+    },
+    {
+      label: "Greece",
+      value: "gr",
+    },
+    {
+      label: "Croatia",
+      value: "hr",
+    },
+    {
+      label: "Hungary",
+      value: "hu",
+    },
+    {
+      label: "Ireland",
+      value: "ie",
+    },
+    {
+      label: "Israel",
+      value: "il",
+    },
+    {
+      label: "India",
+      value: "in",
+    },
+    {
+      label: "Iceland",
+      value: "is",
+    },
+    {
+      label: "Italy",
+      value: "it",
+    },
+    {
+      label: "Japan",
+      value: "jp",
+    },
+    {
+      label: "South Korea",
+      value: "kr",
+    },
+    {
+      label: "Lithuania",
+      value: "lt",
+    },
+    {
+      label: "Latvia",
+      value: "lv",
+    },
+    {
+      label: "Mexico",
+      value: "mx",
+    },
+    {
+      label: "Netherlands",
+      value: "nl",
+    },
+    {
+      label: "Norway",
+      value: "no",
+    },
+    {
+      label: "New Zealand",
+      value: "nz",
+    },
+    {
+      label: "Peru",
+      value: "pe",
+    },
+    {
+      label: "Pakistan",
+      value: "pk",
+    },
+    {
+      label: "Poland",
+      value: "pl",
+    },
+    {
+      label: "Portugal",
+      value: "pt",
+    },
+    {
+      label: "Romania",
+      value: "ro",
+    },
+    {
+      label: "Russia",
+      value: "ru",
+    },
+    {
+      label: "Saudi Arabia",
+      value: "sa",
+    },
+    {
+      label: "Sweden",
+      value: "se",
+    },
+    {
+      label: "Slovakia",
+      value: "sk",
+    },
+    {
+      label: "Türkiye",
+      value: "tr",
+    },
+    {
+      label: "Ukraine",
+      value: "ua",
+    },
+    {
+      label: "United States",
+      value: "us",
+    },
+  ],
+  public_residential_pool: [
+    {
+      label: "Andorra",
+      value: "ad",
+    },
+    {
+      label: "United Arab Emirates",
+      value: "ae",
+    },
+    {
+      label: "Afghanistan",
+      value: "af",
+    },
+    {
+      label: "Albania",
+      value: "al",
+    },
+    {
+      label: "Armenia",
+      value: "am",
+    },
+    {
+      label: "Angola",
+      value: "ao",
+    },
+    {
+      label: "Argentina",
+      value: "ar",
+    },
+    {
+      label: "Austria",
+      value: "at",
+    },
+    {
+      label: "Australia",
+      value: "au",
+    },
+    {
+      label: "Aruba",
+      value: "aw",
+    },
+    {
+      label: "Azerbaijan",
+      value: "az",
+    },
+    {
+      label: "Bosnia & Herzegovina",
+      value: "ba",
+    },
+    {
+      label: "Bangladesh",
+      value: "bd",
+    },
+    {
+      label: "Belgium",
+      value: "be",
+    },
+    {
+      label: "Bulgaria",
+      value: "bg",
+    },
+    {
+      label: "Bahrain",
+      value: "bh",
+    },
+    {
+      label: "Benin",
+      value: "bj",
+    },
+    {
+      label: "Bolivia",
+      value: "bo",
+    },
+    {
+      label: "Brazil",
+      value: "br",
+    },
+    {
+      label: "Bahamas",
+      value: "bs",
+    },
+    {
+      label: "Bhutan",
+      value: "bt",
+    },
+    {
+      label: "Belarus",
+      value: "by",
+    },
+    {
+      label: "Belize",
+      value: "bz",
+    },
+    {
+      label: "Canada",
+      value: "ca",
+    },
+    {
+      label: "Switzerland",
+      value: "ch",
+    },
+    {
+      label: "Côte d’Ivoire",
+      value: "ci",
+    },
+    {
+      label: "Chile",
+      value: "cl",
+    },
+    {
+      label: "China",
+      value: "cn",
+    },
+    {
+      label: "Colombia",
+      value: "co",
+    },
+    {
+      label: "Costa Rica",
+      value: "cr",
+    },
+    {
+      label: "Cuba",
+      value: "cu",
+    },
+    {
+      label: "Czechia",
+      value: "cz",
+    },
+    {
+      label: "Germany",
+      value: "de",
+    },
+    {
+      label: "Denmark",
+      value: "dk",
+    },
+    {
+      label: "Dominica",
+      value: "dm",
+    },
+    {
+      label: "Ecuador",
+      value: "ec",
+    },
+    {
+      label: "Estonia",
+      value: "ee",
+    },
+    {
+      label: "Egypt",
+      value: "eg",
+    },
+    {
+      label: "Spain",
+      value: "es",
+    },
+    {
+      label: "Ethiopia",
+      value: "et",
+    },
+    {
+      label: "Finland",
+      value: "fi",
+    },
+    {
+      label: "Fiji",
+      value: "fj",
+    },
+    {
+      label: "France",
+      value: "fr",
+    },
+    {
+      label: "United Kingdom",
+      value: "gb",
+    },
+    {
+      label: "Georgia",
+      value: "ge",
+    },
+    {
+      label: "Ghana",
+      value: "gh",
+    },
+    {
+      label: "Gambia",
+      value: "gm",
+    },
+    {
+      label: "Greece",
+      value: "gr",
+    },
+    {
+      label: "Hong Kong SAR China",
+      value: "hk",
+    },
+    {
+      label: "Honduras",
+      value: "hn",
+    },
+    {
+      label: "Croatia",
+      value: "hr",
+    },
+    {
+      label: "Haiti",
+      value: "ht",
+    },
+    {
+      label: "Hungary",
+      value: "hu",
+    },
+    {
+      label: "Indonesia",
+      value: "id",
+    },
+    {
+      label: "Ireland",
+      value: "ie",
+    },
+    {
+      label: "Israel",
+      value: "il",
+    },
+    {
+      label: "India",
+      value: "in",
+    },
+    {
+      label: "Iraq",
+      value: "iq",
+    },
+    {
+      label: "Iran",
+      value: "ir",
+    },
+    {
+      label: "Iceland",
+      value: "is",
+    },
+    {
+      label: "Italy",
+      value: "it",
+    },
+    {
+      label: "Jordan",
+      value: "jo",
+    },
+    {
+      label: "Japan",
+      value: "jp",
+    },
+    {
+      label: "Kenya",
+      value: "ke",
+    },
+    {
+      label: "Cambodia",
+      value: "kh",
+    },
+    {
+      label: "South Korea",
+      value: "kr",
+    },
+    {
+      label: "Kazakhstan",
+      value: "kz",
+    },
+    {
+      label: "Lebanon",
+      value: "lb",
+    },
+    {
+      label: "Liberia",
+      value: "lr",
+    },
+    {
+      label: "Lithuania",
+      value: "lt",
+    },
+    {
+      label: "Latvia",
+      value: "lv",
+    },
+    {
+      label: "Morocco",
+      value: "ma",
+    },
+    {
+      label: "Monaco",
+      value: "mc",
+    },
+    {
+      label: "Madagascar",
+      value: "mg",
+    },
+    {
+      label: "North Macedonia",
+      value: "mk",
+    },
+    {
+      label: "Mongolia",
+      value: "mn",
+    },
+    {
+      label: "Mauritania",
+      value: "mr",
+    },
+    {
+      label: "Malta",
+      value: "mt",
+    },
+    {
+      label: "Mauritius",
+      value: "mu",
+    },
+    {
+      label: "Maldives",
+      value: "mv",
+    },
+    {
+      label: "Mexico",
+      value: "mx",
+    },
+    {
+      label: "Malaysia",
+      value: "my",
+    },
+    {
+      label: "Mozambique",
+      value: "mz",
+    },
+    {
+      label: "Nigeria",
+      value: "ng",
+    },
+    {
+      label: "Netherlands",
+      value: "nl",
+    },
+    {
+      label: "Norway",
+      value: "no",
+    },
+    {
+      label: "New Zealand",
+      value: "nz",
+    },
+    {
+      label: "Oman",
+      value: "om",
+    },
+    {
+      label: "Panama",
+      value: "pa",
+    },
+    {
+      label: "Peru",
+      value: "pe",
+    },
+    {
+      label: "Philippines",
+      value: "ph",
+    },
+    {
+      label: "Pakistan",
+      value: "pk",
+    },
+    {
+      label: "Poland",
+      value: "pl",
+    },
+    {
+      label: "Puerto Rico",
+      value: "pr",
+    },
+    {
+      label: "Portugal",
+      value: "pt",
+    },
+    {
+      label: "Paraguay",
+      value: "py",
+    },
+    {
+      label: "Qatar",
+      value: "qa",
+    },
+    {
+      label: "Romania",
+      value: "ro",
+    },
+    {
+      label: "Serbia",
+      value: "rs",
+    },
+    {
+      label: "Russia",
+      value: "ru",
+    },
+    {
+      label: "Saudi Arabia",
+      value: "sa",
+    },
+    {
+      label: "Seychelles",
+      value: "sc",
+    },
+    {
+      label: "Sudan",
+      value: "sd",
+    },
+    {
+      label: "Sweden",
+      value: "se",
+    },
+    {
+      label: "Singapore",
+      value: "sg",
+    },
+    {
+      label: "Slovenia",
+      value: "si",
+    },
+    {
+      label: "Slovakia",
+      value: "sk",
+    },
+    {
+      label: "Senegal",
+      value: "sn",
+    },
+    {
+      label: "South Sudan",
+      value: "ss",
+    },
+    {
+      label: "Tunisia",
+      value: "tn",
+    },
+    {
+      label: "Türkiye",
+      value: "tr",
+    },
+    {
+      label: "Taiwan",
+      value: "tw",
+    },
+    {
+      label: "Ukraine",
+      value: "ua",
+    },
+    {
+      label: "Uganda",
+      value: "ug",
+    },
+    {
+      label: "United States",
+      value: "us",
+    },
+    {
+      label: "Uruguay",
+      value: "uy",
+    },
+    {
+      label: "Uzbekistan",
+      value: "uz",
+    },
+    {
+      label: "Venezuela",
+      value: "ve",
+    },
+    {
+      label: "British Virgin Islands",
+      value: "vg",
+    },
+    {
+      label: "Vietnam",
+      value: "vn",
+    },
+    {
+      label: "Yemen",
+      value: "ye",
+    },
+    {
+      label: "South Africa",
+      value: "za",
+    },
+  ],
+};