From 184b9d9bd869e804304d841489dc0d7a97019901 Mon Sep 17 00:00:00 2001 From: Luan Cazarine Date: Wed, 15 Jan 2025 13:49:29 -0300 Subject: [PATCH 1/6] ocrspace init --- .../actions/get-ocr-result/get-ocr-result.mjs | 26 ++ .../actions/process-image/process-image.mjs | 53 +++ .../actions/process-pdf/process-pdf.mjs | 54 +++ components/ocrspace/ocrspace.app.mjs | 356 +++++++++++++++++- .../new-file-uploaded/new-file-uploaded.mjs | 111 ++++++ .../new-ocr-job-completed-instant.mjs | 67 ++++ 6 files changed, 665 insertions(+), 2 deletions(-) create mode 100644 components/ocrspace/actions/get-ocr-result/get-ocr-result.mjs create mode 100644 components/ocrspace/actions/process-image/process-image.mjs create mode 100644 components/ocrspace/actions/process-pdf/process-pdf.mjs create mode 100644 components/ocrspace/sources/new-file-uploaded/new-file-uploaded.mjs create mode 100644 components/ocrspace/sources/new-ocr-job-completed-instant/new-ocr-job-completed-instant.mjs diff --git a/components/ocrspace/actions/get-ocr-result/get-ocr-result.mjs b/components/ocrspace/actions/get-ocr-result/get-ocr-result.mjs new file mode 100644 index 0000000000000..eab70083fffb3 --- /dev/null +++ b/components/ocrspace/actions/get-ocr-result/get-ocr-result.mjs @@ -0,0 +1,26 @@ +import ocrspace from "../../ocrspace.app.mjs"; +import { axios } from "@pipedream/platform"; + +export default { + key: "ocrspace-get-ocr-result", + name: "Get OCR Result", + description: "Retrieves the processed OCR result for a specific job ID. [See the documentation](https://ocr.space/ocrapi)", + version: "0.0.{{ts}}", + type: "action", + props: { + ocrspace, + jobId: { + propDefinition: [ + ocrspace, + "jobId", + ], + }, + }, + async run({ $ }) { + const result = await this.ocrspace.retrieveOcrResult({ + jobId: this.jobId, + }); + $.export("$summary", `Retrieved OCR result for job ID ${this.jobId}`); + return result; + }, +}; diff --git a/components/ocrspace/actions/process-image/process-image.mjs b/components/ocrspace/actions/process-image/process-image.mjs new file mode 100644 index 0000000000000..e0499c680626d --- /dev/null +++ b/components/ocrspace/actions/process-image/process-image.mjs @@ -0,0 +1,53 @@ +import ocrspace from "../../ocrspace.app.mjs"; +import { axios } from "@pipedream/platform"; + +export default { + key: "ocrspace-process-image", + name: "Process Image", + description: "Submits an image file for OCR processing using OCR.space. [See the documentation](https://ocr.space/ocrapi)", + version: "0.0.{{ts}}", + type: "action", + props: { + ocrspace: { + type: "app", + app: "ocrspace", + }, + imageUrl: { + propDefinition: [ + "ocrspace", + "imageUrl", + ], + }, + imageFile: { + propDefinition: [ + "ocrspace", + "imageFile", + ], + }, + imageLanguage: { + propDefinition: [ + "ocrspace", + "imageLanguage", + ], + optional: true, + }, + }, + async run({ $ }) { + if (!this.imageUrl && !this.imageFile) { + throw new Error("Either Image File URL or Image File Upload must be provided."); + } + + const response = await this.ocrspace.submitImage({ + imageUrl: this.imageUrl, + imageFile: this.imageFile, + imageLanguage: this.imageLanguage, + }); + + const summary = response.JobId + ? `Image submitted for OCR processing. Job ID: ${response.JobId}` + : "Image submitted for OCR processing."; + + $.export("$summary", summary); + return response; + }, +}; diff --git a/components/ocrspace/actions/process-pdf/process-pdf.mjs b/components/ocrspace/actions/process-pdf/process-pdf.mjs new file mode 100644 index 0000000000000..50ec5141305f6 --- /dev/null +++ b/components/ocrspace/actions/process-pdf/process-pdf.mjs @@ -0,0 +1,54 @@ +import ocrspace from "../../ocrspace.app.mjs"; +import { axios } from "@pipedream/platform"; + +export default { + key: "ocrspace-process-pdf", + name: "Process PDF for OCR", + description: "Submit a PDF for OCR processing. [See the documentation]()", + version: "0.0.{{ts}}", + type: "action", + props: { + ocrspace: { + type: "app", + app: "ocrspace", + }, + pdfUrl: { + propDefinition: [ + ocrspace, + "pdfUrl", + ], + }, + pdfFile: { + propDefinition: [ + ocrspace, + "pdfFile", + ], + }, + pdfLanguage: { + propDefinition: [ + ocrspace, + "pdfLanguage", + ], + optional: true, + }, + pdfPages: { + propDefinition: [ + ocrspace, + "pdfPages", + ], + optional: true, + }, + }, + async run({ $ }) { + const response = await this.ocrspace.submitPdf({ + pdfUrl: this.pdfUrl, + pdfFile: this.pdfFile, + pdfLanguage: this.pdfLanguage, + pdfPages: this.pdfPages, + }); + + const jobId = response.JobId || response.jobId || "N/A"; + $.export("$summary", `Submitted PDF for OCR processing. Job ID: ${jobId}`); + return response; + }, +}; diff --git a/components/ocrspace/ocrspace.app.mjs b/components/ocrspace/ocrspace.app.mjs index 915aa5782b482..ae49aa9d24882 100644 --- a/components/ocrspace/ocrspace.app.mjs +++ b/components/ocrspace/ocrspace.app.mjs @@ -1,9 +1,361 @@ +import { axios } from "@pipedream/platform"; +import FormData from "form-data"; + export default { type: "app", app: "ocrspace", - propDefinitions: {}, + version: "0.0.{{ts}}", + propDefinitions: { + webhookUrl: { + type: "string", + label: "Webhook URL", + description: "The URL to receive notifications of completed OCR processing.", + }, + monitoredFolder: { + type: "string", + label: "Monitored Folder", + description: "Optional folder to monitor for new file uploads.", + optional: true, + }, + processingQueue: { + type: "string", + label: "Processing Queue", + description: "Optional processing queue to monitor for new file uploads.", + optional: true, + }, + imageUrl: { + type: "string", + label: "Image File URL", + description: "The URL of the image file to submit for OCR processing.", + optional: true, + }, + imageFile: { + type: "file", + label: "Image File Upload", + description: "The image file to submit for OCR processing.", + optional: true, + }, + imageLanguage: { + type: "string", + label: "Image Language", + description: "Optional language setting for image OCR processing.", + optional: true, + options: [ + { + label: "Arabic", + value: "ara", + }, + { + label: "Bulgarian", + value: "bul", + }, + { + label: "Chinese (Simplified)", + value: "chs", + }, + { + label: "Chinese (Traditional)", + value: "cht", + }, + { + label: "Croatian", + value: "hrv", + }, + { + label: "Czech", + value: "cze", + }, + { + label: "Danish", + value: "dan", + }, + { + label: "Dutch", + value: "dut", + }, + { + label: "English", + value: "eng", + }, + { + label: "Finnish", + value: "fin", + }, + { + label: "French", + value: "fre", + }, + { + label: "German", + value: "ger", + }, + { + label: "Greek", + value: "gre", + }, + { + label: "Hungarian", + value: "hun", + }, + { + label: "Korean", + value: "kor", + }, + { + label: "Italian", + value: "ita", + }, + { + label: "Japanese", + value: "jpn", + }, + { + label: "Polish", + value: "pol", + }, + { + label: "Portuguese", + value: "por", + }, + { + label: "Russian", + value: "rus", + }, + { + label: "Slovenian", + value: "slv", + }, + { + label: "Spanish", + value: "spa", + }, + { + label: "Swedish", + value: "swe", + }, + { + label: "Turkish", + value: "tur", + }, + ], + }, + pdfUrl: { + type: "string", + label: "PDF File URL", + description: "The URL of the PDF file to submit for OCR processing.", + optional: true, + }, + pdfFile: { + type: "file", + label: "PDF File Upload", + description: "The PDF file to submit for OCR processing.", + optional: true, + }, + pdfLanguage: { + type: "string", + label: "PDF Language", + description: "Optional language setting for PDF OCR processing.", + optional: true, + options: [ + { + label: "Arabic", + value: "ara", + }, + { + label: "Bulgarian", + value: "bul", + }, + { + label: "Chinese (Simplified)", + value: "chs", + }, + { + label: "Chinese (Traditional)", + value: "cht", + }, + { + label: "Croatian", + value: "hrv", + }, + { + label: "Czech", + value: "cze", + }, + { + label: "Danish", + value: "dan", + }, + { + label: "Dutch", + value: "dut", + }, + { + label: "English", + value: "eng", + }, + { + label: "Finnish", + value: "fin", + }, + { + label: "French", + value: "fre", + }, + { + label: "German", + value: "ger", + }, + { + label: "Greek", + value: "gre", + }, + { + label: "Hungarian", + value: "hun", + }, + { + label: "Korean", + value: "kor", + }, + { + label: "Italian", + value: "ita", + }, + { + label: "Japanese", + value: "jpn", + }, + { + label: "Polish", + value: "pol", + }, + { + label: "Portuguese", + value: "por", + }, + { + label: "Russian", + value: "rus", + }, + { + label: "Slovenian", + value: "slv", + }, + { + label: "Spanish", + value: "spa", + }, + { + label: "Swedish", + value: "swe", + }, + { + label: "Turkish", + value: "tur", + }, + ], + }, + pdfPages: { + type: "string", + label: "PDF Pages", + description: "Optional specific pages to process in the PDF file.", + optional: true, + }, + jobId: { + type: "string", + label: "Job ID", + description: "The Job ID to retrieve the processed OCR result.", + }, + }, methods: { - // this.$auth contains connected account data + _baseUrl() { + return "https://api.ocr.space"; + }, + async _makeRequest(opts = {}) { + const { + $, method = "POST", path = "/parse/image", headers, ...otherOpts + } = opts; + return axios($, { + ...otherOpts, + method, + url: this._baseUrl() + path, + headers: { + ...headers, + apikey: this.$auth.apikey, + }, + }); + }, + async submitImage(opts = {}) { + const { + imageUrl, imageFile, imageLanguage, + } = opts; + const formData = new FormData(); + if (imageUrl) { + formData.append("url", imageUrl); + } + if (imageFile) { + formData.append("file", imageFile); + } + formData.append("isOverlayRequired", false); + if (imageLanguage) { + formData.append("language", imageLanguage); + } + return this._makeRequest({ + path: "/parse/image", + method: "POST", + data: formData, + headers: formData.getHeaders(), + }); + }, + async submitPdf(opts = {}) { + const { + pdfUrl, pdfFile, pdfLanguage, pdfPages, + } = opts; + const formData = new FormData(); + if (pdfUrl) { + formData.append("url", pdfUrl); + } + if (pdfFile) { + formData.append("file", pdfFile); + } + formData.append("isOverlayRequired", false); + if (pdfLanguage) { + formData.append("language", pdfLanguage); + } + if (pdfPages) { + formData.append("pages", pdfPages); + } + return this._makeRequest({ + path: "/parse/image", + method: "POST", + data: formData, + headers: formData.getHeaders(), + }); + }, + async retrieveOcrResult(opts = {}) { + const { jobId } = opts; + const path = `/parse/image/${jobId}`; + return this._makeRequest({ + path, + method: "GET", + }); + }, + async emitOcrJobCompleted(data) { + const { webhookUrl } = this; + await axios(this, { + method: "POST", + url: webhookUrl, + data, + }); + }, + async emitNewFileUploaded(data) { + const { webhookUrl } = this; + await axios(this, { + method: "POST", + url: webhookUrl, + data, + }); + }, authKeys() { console.log(Object.keys(this.$auth)); }, diff --git a/components/ocrspace/sources/new-file-uploaded/new-file-uploaded.mjs b/components/ocrspace/sources/new-file-uploaded/new-file-uploaded.mjs new file mode 100644 index 0000000000000..72d297ec5ee32 --- /dev/null +++ b/components/ocrspace/sources/new-file-uploaded/new-file-uploaded.mjs @@ -0,0 +1,111 @@ +import { + axios, DEFAULT_POLLING_SOURCE_TIMER_INTERVAL, +} from "@pipedream/platform"; +import ocrspace from "../../ocrspace.app.mjs"; + +export default { + key: "ocrspace-new-file-uploaded", + name: "New OCR File Uploaded", + description: "Emit a new event when a new file is uploaded for OCR processing. [See the documentation]()", + version: "0.0.{{ts}}", + type: "source", + dedupe: "unique", + props: { + ocrspace, + db: "$.service.db", + timer: { + type: "$.interface.timer", + default: { + intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL, + }, + }, + monitoredFolder: { + type: "string", + label: "Monitored Folder", + description: "Optional folder to monitor for new file uploads.", + optional: true, + }, + processingQueue: { + type: "string", + label: "Processing Queue", + description: "Optional processing queue to monitor for new file uploads.", + optional: true, + }, + }, + methods: { + async fetchUploadedFiles() { + const params = {}; + if (this.monitoredFolder) { + params.folder = this.monitoredFolder; + } + if (this.processingQueue) { + params.queue = this.processingQueue; + } + const uploadedFiles = await this.ocrspace._makeRequest({ + path: "/list/uploads", + method: "GET", + params, + }); + return uploadedFiles; + }, + }, + hooks: { + async deploy() { + const files = await this.fetchUploadedFiles(); + // Sort files by timestamp descending + files.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp)); + const recentFiles = files.slice(0, 50).reverse(); + for (const file of recentFiles) { + const id = file.id || new Date(file.timestamp).getTime(); + const summary = `New file uploaded: ${file.name}`; + const ts = file.timestamp + ? new Date(file.timestamp).getTime() + : Date.now(); + this.$emit(file, { + id, + summary, + ts, + }); + } + if (files.length > 0) { + const latestFile = files[0]; + const lastFileId = latestFile.id || new Date(latestFile.timestamp).getTime(); + await this.db.set("lastFileId", lastFileId); + } + }, + async activate() { + // No action needed on activate + }, + async deactivate() { + // No action needed on deactivate + }, + }, + async run() { + const files = await this.fetchUploadedFiles(); + const lastFileId = (await this.db.get("lastFileId")) || 0; + const newFiles = files.filter( + (file) => + (file.id && file.id > lastFileId) || + (file.timestamp && new Date(file.timestamp).getTime() > lastFileId), + ); + // Sort newFiles by timestamp ascending + newFiles.sort((a, b) => new Date(a.timestamp) - new Date(b.timestamp)); + for (const file of newFiles) { + const id = file.id || new Date(file.timestamp).getTime(); + const summary = `New file uploaded: ${file.name}`; + const ts = file.timestamp + ? new Date(file.timestamp).getTime() + : Date.now(); + this.$emit(file, { + id, + summary, + ts, + }); + } + if (files.length > 0) { + const latestFile = files[0]; + const lastFileId = latestFile.id || new Date(latestFile.timestamp).getTime(); + await this.db.set("lastFileId", lastFileId); + } + }, +}; diff --git a/components/ocrspace/sources/new-ocr-job-completed-instant/new-ocr-job-completed-instant.mjs b/components/ocrspace/sources/new-ocr-job-completed-instant/new-ocr-job-completed-instant.mjs new file mode 100644 index 0000000000000..cfed5441c340f --- /dev/null +++ b/components/ocrspace/sources/new-ocr-job-completed-instant/new-ocr-job-completed-instant.mjs @@ -0,0 +1,67 @@ +import ocrspace from "../../ocrspace.app.mjs"; +import { axios } from "@pipedream/platform"; + +export default { + key: "ocrspace-new-ocr-job-completed-instant", + name: "New OCR Job Completed", + description: "Emit new event when an OCR job is completed. [See the documentation](https://ocr.space/ocrapi)", + version: "0.0.{{ts}}", + type: "source", + dedupe: "unique", + props: { + ocrspace: { + type: "app", + app: "ocrspace", + }, + http: { + type: "$.interface.http", + customResponse: true, + }, + db: "$.service.db", + }, + hooks: { + async activate() { + console.log(`Please configure your OCRSpace webhook URL to: ${this.http.endpoint}`); + }, + async deactivate() { + console.log("Webhook deactivated. Please remove the webhook URL from your OCRSpace settings."); + }, + async deploy() { + const lastJobId = await this.db.get("lastJobId"); + if (lastJobId) { + try { + const job = await this.ocrspace.retrieveOcrResult({ + jobId: lastJobId, + }); + this.$emit(job, { + id: job.jobId || lastJobId, + summary: `OCR job completed: ${job.jobId || lastJobId}`, + ts: Date.now(), + }); + } catch (error) { + console.error(`Failed to retrieve OCR job with ID ${lastJobId}:`, error); + } + } + }, + }, + async run(event) { + const job = event; + const jobId = job.jobId || job.id || null; + const summary = jobId + ? `OCR job completed: ${jobId}` + : "OCR job completed"; + const ts = job.completedAt + ? Date.parse(job.completedAt) + : Date.now(); + + this.$emit(job, { + id: jobId || ts.toString(), + summary, + ts, + }); + + if (jobId) { + await this.db.set("lastJobId", jobId); + } + }, +}; From c92f9e558862cc35bd0b4a800230e6819fbeace7 Mon Sep 17 00:00:00 2001 From: Luan Cazarine Date: Thu, 16 Jan 2025 12:17:31 -0300 Subject: [PATCH 2/6] [Components] ocrspace #15148 Actions - Process Image - Process PDF --- .../ocrspace/actions/common/process-base.mjs | 70 ++++ .../actions/get-ocr-result/get-ocr-result.mjs | 26 -- .../actions/process-image/process-image.mjs | 51 +-- .../actions/process-pdf/process-pdf.mjs | 56 +-- components/ocrspace/common/constants.mjs | 117 ++++++ components/ocrspace/common/utils.mjs | 31 ++ components/ocrspace/ocrspace.app.mjs | 371 +++--------------- components/ocrspace/package.json | 18 + .../new-file-uploaded/new-file-uploaded.mjs | 111 ------ .../new-ocr-job-completed-instant.mjs | 67 ---- 10 files changed, 313 insertions(+), 605 deletions(-) create mode 100644 components/ocrspace/actions/common/process-base.mjs delete mode 100644 components/ocrspace/actions/get-ocr-result/get-ocr-result.mjs create mode 100644 components/ocrspace/common/constants.mjs create mode 100644 components/ocrspace/common/utils.mjs create mode 100644 components/ocrspace/package.json delete mode 100644 components/ocrspace/sources/new-file-uploaded/new-file-uploaded.mjs delete mode 100644 components/ocrspace/sources/new-ocr-job-completed-instant/new-ocr-job-completed-instant.mjs diff --git a/components/ocrspace/actions/common/process-base.mjs b/components/ocrspace/actions/common/process-base.mjs new file mode 100644 index 0000000000000..adc8f0cfffb4c --- /dev/null +++ b/components/ocrspace/actions/common/process-base.mjs @@ -0,0 +1,70 @@ +import FormData from "form-data"; +import { getUrlOrFile } from "../../common/utils.mjs"; +import ocrspace from "../../ocrspace.app.mjs"; + +export default { + props: { + ocrspace, + language: { + propDefinition: [ + ocrspace, + "language", + ], + }, + isOverlayRequired: { + propDefinition: [ + ocrspace, + "isOverlayRequired", + ], + }, + detectOrientation: { + propDefinition: [ + ocrspace, + "detectOrientation", + ], + }, + scale: { + propDefinition: [ + ocrspace, + "scale", + ], + }, + isTable: { + propDefinition: [ + ocrspace, + "isTable", + ], + }, + ocrEngine: { + propDefinition: [ + ocrspace, + "ocrEngine", + ], + }, + }, + async run({ $ }) { + const data = new FormData(); + const { + url, file, + } = getUrlOrFile(this.file); + + if (url) data.append("url", url); + if (file) data.append("file", file); + if (this.imageLanguage) data.append("language", this.imageLanguage); + if (this.isOverlayRequired) data.append("isOverlayRequired", `${this.isOverlayRequired}`); + if (this.filetype) data.append("filetype", this.filetype); + if (this.detectOrientation) data.append("detectOrientation", `${this.detectOrientation}`); + if (this.scale) data.append("scale", `${this.scale}`); + if (this.isTable) data.append("isTable", `${this.isTable}`); + if (this.ocrEngine) data.append("OCREngine", this.ocrEngine); + + const response = await this.ocrspace.processImage({ + $, + data, + headers: data.getHeaders(), + }); + + $.export("$summary", this.getSummary()); + return response; + }, +}; diff --git a/components/ocrspace/actions/get-ocr-result/get-ocr-result.mjs b/components/ocrspace/actions/get-ocr-result/get-ocr-result.mjs deleted file mode 100644 index eab70083fffb3..0000000000000 --- a/components/ocrspace/actions/get-ocr-result/get-ocr-result.mjs +++ /dev/null @@ -1,26 +0,0 @@ -import ocrspace from "../../ocrspace.app.mjs"; -import { axios } from "@pipedream/platform"; - -export default { - key: "ocrspace-get-ocr-result", - name: "Get OCR Result", - description: "Retrieves the processed OCR result for a specific job ID. [See the documentation](https://ocr.space/ocrapi)", - version: "0.0.{{ts}}", - type: "action", - props: { - ocrspace, - jobId: { - propDefinition: [ - ocrspace, - "jobId", - ], - }, - }, - async run({ $ }) { - const result = await this.ocrspace.retrieveOcrResult({ - jobId: this.jobId, - }); - $.export("$summary", `Retrieved OCR result for job ID ${this.jobId}`); - return result; - }, -}; diff --git a/components/ocrspace/actions/process-image/process-image.mjs b/components/ocrspace/actions/process-image/process-image.mjs index e0499c680626d..dace35e1e2ca1 100644 --- a/components/ocrspace/actions/process-image/process-image.mjs +++ b/components/ocrspace/actions/process-image/process-image.mjs @@ -1,53 +1,30 @@ -import ocrspace from "../../ocrspace.app.mjs"; -import { axios } from "@pipedream/platform"; +import common from "../common/process-base.mjs"; export default { + ...common, key: "ocrspace-process-image", name: "Process Image", description: "Submits an image file for OCR processing using OCR.space. [See the documentation](https://ocr.space/ocrapi)", - version: "0.0.{{ts}}", + version: "0.0.1", type: "action", props: { - ocrspace: { - type: "app", - app: "ocrspace", - }, - imageUrl: { - propDefinition: [ - "ocrspace", - "imageUrl", - ], - }, - imageFile: { + ...common.props, + file: { propDefinition: [ - "ocrspace", - "imageFile", + common.props.ocrspace, + "file", ], }, - imageLanguage: { + filetype: { propDefinition: [ - "ocrspace", - "imageLanguage", + common.props.ocrspace, + "filetype", ], - optional: true, }, }, - async run({ $ }) { - if (!this.imageUrl && !this.imageFile) { - throw new Error("Either Image File URL or Image File Upload must be provided."); - } - - const response = await this.ocrspace.submitImage({ - imageUrl: this.imageUrl, - imageFile: this.imageFile, - imageLanguage: this.imageLanguage, - }); - - const summary = response.JobId - ? `Image submitted for OCR processing. Job ID: ${response.JobId}` - : "Image submitted for OCR processing."; - - $.export("$summary", summary); - return response; + methods: { + getSummary() { + return "Image submitted for OCR processing."; + }, }, }; diff --git a/components/ocrspace/actions/process-pdf/process-pdf.mjs b/components/ocrspace/actions/process-pdf/process-pdf.mjs index 50ec5141305f6..4669979849a62 100644 --- a/components/ocrspace/actions/process-pdf/process-pdf.mjs +++ b/components/ocrspace/actions/process-pdf/process-pdf.mjs @@ -1,54 +1,26 @@ -import ocrspace from "../../ocrspace.app.mjs"; -import { axios } from "@pipedream/platform"; +import common from "../common/process-base.mjs"; export default { + ...common, key: "ocrspace-process-pdf", name: "Process PDF for OCR", - description: "Submit a PDF for OCR processing. [See the documentation]()", - version: "0.0.{{ts}}", + description: "Submit a PDF for OCR processing. [See the documentation](https://ocr.space/ocrapi)", + version: "0.0.1", type: "action", props: { - ocrspace: { - type: "app", - app: "ocrspace", - }, - pdfUrl: { - propDefinition: [ - ocrspace, - "pdfUrl", - ], - }, - pdfFile: { - propDefinition: [ - ocrspace, - "pdfFile", - ], - }, - pdfLanguage: { - propDefinition: [ - ocrspace, - "pdfLanguage", - ], - optional: true, - }, - pdfPages: { + ...common.props, + file: { propDefinition: [ - ocrspace, - "pdfPages", + common.props.ocrspace, + "file", ], - optional: true, + label: "PDF File", + description: "The URL of the PDF file or the path to the file saved to the `/tmp` directory (e.g. `/tmp/example.jpg`) to process. [See the documentation](https://pipedream.com/docs/workflows/steps/code/nodejs/working-with-files/#the-tmp-directory).", }, }, - async run({ $ }) { - const response = await this.ocrspace.submitPdf({ - pdfUrl: this.pdfUrl, - pdfFile: this.pdfFile, - pdfLanguage: this.pdfLanguage, - pdfPages: this.pdfPages, - }); - - const jobId = response.JobId || response.jobId || "N/A"; - $.export("$summary", `Submitted PDF for OCR processing. Job ID: ${jobId}`); - return response; + methods: { + getSummary() { + return "Submitted PDF for OCR processing."; + }, }, }; diff --git a/components/ocrspace/common/constants.mjs b/components/ocrspace/common/constants.mjs new file mode 100644 index 0000000000000..ea51ce5ad0337 --- /dev/null +++ b/components/ocrspace/common/constants.mjs @@ -0,0 +1,117 @@ +export const LANGUAGE_OPTIONS = [ + { + label: "Arabic", + value: "ara", + }, + { + label: "Bulgarian", + value: "bul", + }, + { + label: "Chinese (Simplified)", + value: "chs", + }, + { + label: "Chinese (Traditional)", + value: "cht", + }, + { + label: "Croatian", + value: "hrv", + }, + { + label: "Czech", + value: "cze", + }, + { + label: "Danish", + value: "dan", + }, + { + label: "Dutch", + value: "dut", + }, + { + label: "English", + value: "eng", + }, + { + label: "Finnish", + value: "fin", + }, + { + label: "French", + value: "fre", + }, + { + label: "German", + value: "ger", + }, + { + label: "Greek", + value: "gre", + }, + { + label: "Hungarian", + value: "hun", + }, + { + label: "Korean", + value: "kor", + }, + { + label: "Italian", + value: "ita", + }, + { + label: "Japanese", + value: "jpn", + }, + { + label: "Polish", + value: "pol", + }, + { + label: "Portuguese", + value: "por", + }, + { + label: "Russian", + value: "rus", + }, + { + label: "Slovenian", + value: "slv", + }, + { + label: "Spanish", + value: "spa", + }, + { + label: "Swedish", + value: "swe", + }, + { + label: "Turkish", + value: "tur", + }, +]; + +export const IMAGE_FILETYPE_OPTIONS = [ + "GIF", + "PNG", + "JPG", + "TIF", + "BMP", +]; + +export const OCR_ENGINE_OPTIONS = [ + { + label: "OCR Engine 1", + value: "1", + }, + { + label: "OCR Engine 2", + value: "2", + }, +]; diff --git a/components/ocrspace/common/utils.mjs b/components/ocrspace/common/utils.mjs new file mode 100644 index 0000000000000..b9920ab13c563 --- /dev/null +++ b/components/ocrspace/common/utils.mjs @@ -0,0 +1,31 @@ +import fs from "fs"; + +export const isValidUrl = (urlString) => { + var urlPattern = new RegExp("^(https?:\\/\\/)?" + // validate protocol +"((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|" + // validate domain name +"((\\d{1,3}\\.){3}\\d{1,3}))" + // validate OR ip (v4) address +"(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*" + // validate port and path +"(\\?[;&a-z\\d%_.~+=-]*)?" + // validate query string +"(\\#[-a-z\\d_]*)?$", "i"); // validate fragment locator + return !!urlPattern.test(urlString); +}; + +export const checkTmp = (filename) => { + if (filename.indexOf("/tmp") === -1) { + return `/tmp/${filename}`; + } + return filename; +}; + +export const getUrlOrFile = (url) => { + if (!isValidUrl(url)) { + const data = fs.readFileSync(checkTmp(url)); + const base64Image = Buffer.from(data, "binary").toString("base64"); + return { + file: `data:image/jpeg;base64,${base64Image}`, + }; + } + return { + url, + }; +}; diff --git a/components/ocrspace/ocrspace.app.mjs b/components/ocrspace/ocrspace.app.mjs index ae49aa9d24882..ae96d7db00db7 100644 --- a/components/ocrspace/ocrspace.app.mjs +++ b/components/ocrspace/ocrspace.app.mjs @@ -1,363 +1,90 @@ import { axios } from "@pipedream/platform"; -import FormData from "form-data"; +import { + IMAGE_FILETYPE_OPTIONS, + LANGUAGE_OPTIONS, + OCR_ENGINE_OPTIONS, +} from "./common/constants.mjs"; export default { type: "app", app: "ocrspace", - version: "0.0.{{ts}}", propDefinitions: { - webhookUrl: { + file: { type: "string", - label: "Webhook URL", - description: "The URL to receive notifications of completed OCR processing.", + label: "Image", + description: "The URL of the image or the path to the file saved to the `/tmp` directory (e.g. `/tmp/example.jpg`) to process. [See the documentation](https://pipedream.com/docs/workflows/steps/code/nodejs/working-with-files/#the-tmp-directory).", }, - monitoredFolder: { + language: { type: "string", - label: "Monitored Folder", - description: "Optional folder to monitor for new file uploads.", + label: "Language", + description: "Language setting for image OCR processing.", + options: LANGUAGE_OPTIONS, optional: true, }, - processingQueue: { - type: "string", - label: "Processing Queue", - description: "Optional processing queue to monitor for new file uploads.", + isOverlayRequired: { + type: "boolean", + label: "Is Overlay Required", + description: "If true, returns the coordinates of the bounding boxes for each word. If false, the OCR'ed text is returned only as a text block (this makes the JSON reponse smaller). Overlay data can be used, for example, to show [text over the image](https://ocr.space/english).", optional: true, }, - imageUrl: { + filetype: { type: "string", - label: "Image File URL", - description: "The URL of the image file to submit for OCR processing.", + label: "File Type", + description: "Overwrites the automatic file type detection based on content-type. Supported image file formats are png, jpg (jpeg), gif, tif (tiff) and bmp. For document ocr, the api supports the Adobe PDF format. Multi-page TIFF files are supported.", + options: IMAGE_FILETYPE_OPTIONS, optional: true, }, - imageFile: { - type: "file", - label: "Image File Upload", - description: "The image file to submit for OCR processing.", + detectOrientation: { + type: "boolean", + label: "Detect Orientation", + description: "If set to true, the api autorotates the image correctly and sets the TextOrientation parameter in the JSON response. If the image is not rotated, then TextOrientation=0, otherwise it is the degree of the rotation, e. g. \"270\".", optional: true, }, - imageLanguage: { - type: "string", - label: "Image Language", - description: "Optional language setting for image OCR processing.", + scale: { + type: "boolean", + label: "Scale", + description: "If set to true, the api does some internal upscaling. This can improve the OCR result significantly, especially for low-resolution PDF scans. Note that the front page demo uses scale=true, but the API uses scale=false by default. See also this OCR forum post.", optional: true, - options: [ - { - label: "Arabic", - value: "ara", - }, - { - label: "Bulgarian", - value: "bul", - }, - { - label: "Chinese (Simplified)", - value: "chs", - }, - { - label: "Chinese (Traditional)", - value: "cht", - }, - { - label: "Croatian", - value: "hrv", - }, - { - label: "Czech", - value: "cze", - }, - { - label: "Danish", - value: "dan", - }, - { - label: "Dutch", - value: "dut", - }, - { - label: "English", - value: "eng", - }, - { - label: "Finnish", - value: "fin", - }, - { - label: "French", - value: "fre", - }, - { - label: "German", - value: "ger", - }, - { - label: "Greek", - value: "gre", - }, - { - label: "Hungarian", - value: "hun", - }, - { - label: "Korean", - value: "kor", - }, - { - label: "Italian", - value: "ita", - }, - { - label: "Japanese", - value: "jpn", - }, - { - label: "Polish", - value: "pol", - }, - { - label: "Portuguese", - value: "por", - }, - { - label: "Russian", - value: "rus", - }, - { - label: "Slovenian", - value: "slv", - }, - { - label: "Spanish", - value: "spa", - }, - { - label: "Swedish", - value: "swe", - }, - { - label: "Turkish", - value: "tur", - }, - ], }, - pdfUrl: { - type: "string", - label: "PDF File URL", - description: "The URL of the PDF file to submit for OCR processing.", + isTable: { + type: "boolean", + label: "Is Table", + description: "If set to true, the OCR logic makes sure that the parsed text result is always returned line by line. This switch is recommended for [table OCR](https://ocr.space/tablerecognition), [receipt OCR](https://ocr.space/receiptscanning), invoice processing and all other type of input documents that have a table like structure.", optional: true, }, - pdfFile: { - type: "file", - label: "PDF File Upload", - description: "The PDF file to submit for OCR processing.", - optional: true, - }, - pdfLanguage: { + ocrEngine: { type: "string", - label: "PDF Language", - description: "Optional language setting for PDF OCR processing.", + label: "OCR Engine", + description: "Engine 1 is default. [See OCR Engines](https://ocr.space/OCRAPI#ocrengine).", + options: OCR_ENGINE_OPTIONS, optional: true, - options: [ - { - label: "Arabic", - value: "ara", - }, - { - label: "Bulgarian", - value: "bul", - }, - { - label: "Chinese (Simplified)", - value: "chs", - }, - { - label: "Chinese (Traditional)", - value: "cht", - }, - { - label: "Croatian", - value: "hrv", - }, - { - label: "Czech", - value: "cze", - }, - { - label: "Danish", - value: "dan", - }, - { - label: "Dutch", - value: "dut", - }, - { - label: "English", - value: "eng", - }, - { - label: "Finnish", - value: "fin", - }, - { - label: "French", - value: "fre", - }, - { - label: "German", - value: "ger", - }, - { - label: "Greek", - value: "gre", - }, - { - label: "Hungarian", - value: "hun", - }, - { - label: "Korean", - value: "kor", - }, - { - label: "Italian", - value: "ita", - }, - { - label: "Japanese", - value: "jpn", - }, - { - label: "Polish", - value: "pol", - }, - { - label: "Portuguese", - value: "por", - }, - { - label: "Russian", - value: "rus", - }, - { - label: "Slovenian", - value: "slv", - }, - { - label: "Spanish", - value: "spa", - }, - { - label: "Swedish", - value: "swe", - }, - { - label: "Turkish", - value: "tur", - }, - ], - }, - pdfPages: { - type: "string", - label: "PDF Pages", - description: "Optional specific pages to process in the PDF file.", - optional: true, - }, - jobId: { - type: "string", - label: "Job ID", - description: "The Job ID to retrieve the processed OCR result.", }, }, methods: { _baseUrl() { return "https://api.ocr.space"; }, - async _makeRequest(opts = {}) { - const { - $, method = "POST", path = "/parse/image", headers, ...otherOpts - } = opts; + _headers(headers = {}) { + return { + "apikey": this.$auth.apikey, + ...headers, + }; + }, + _makeRequest({ + $ = this, path, headers, ...opts + }) { return axios($, { - ...otherOpts, - method, url: this._baseUrl() + path, - headers: { - ...headers, - apikey: this.$auth.apikey, - }, + headers: this._headers(headers), + ...opts, }); }, - async submitImage(opts = {}) { - const { - imageUrl, imageFile, imageLanguage, - } = opts; - const formData = new FormData(); - if (imageUrl) { - formData.append("url", imageUrl); - } - if (imageFile) { - formData.append("file", imageFile); - } - formData.append("isOverlayRequired", false); - if (imageLanguage) { - formData.append("language", imageLanguage); - } + processImage(opts = {}) { return this._makeRequest({ - path: "/parse/image", method: "POST", - data: formData, - headers: formData.getHeaders(), - }); - }, - async submitPdf(opts = {}) { - const { - pdfUrl, pdfFile, pdfLanguage, pdfPages, - } = opts; - const formData = new FormData(); - if (pdfUrl) { - formData.append("url", pdfUrl); - } - if (pdfFile) { - formData.append("file", pdfFile); - } - formData.append("isOverlayRequired", false); - if (pdfLanguage) { - formData.append("language", pdfLanguage); - } - if (pdfPages) { - formData.append("pages", pdfPages); - } - return this._makeRequest({ path: "/parse/image", - method: "POST", - data: formData, - headers: formData.getHeaders(), + ...opts, }); }, - async retrieveOcrResult(opts = {}) { - const { jobId } = opts; - const path = `/parse/image/${jobId}`; - return this._makeRequest({ - path, - method: "GET", - }); - }, - async emitOcrJobCompleted(data) { - const { webhookUrl } = this; - await axios(this, { - method: "POST", - url: webhookUrl, - data, - }); - }, - async emitNewFileUploaded(data) { - const { webhookUrl } = this; - await axios(this, { - method: "POST", - url: webhookUrl, - data, - }); - }, - authKeys() { - console.log(Object.keys(this.$auth)); - }, }, }; diff --git a/components/ocrspace/package.json b/components/ocrspace/package.json new file mode 100644 index 0000000000000..7c2dc40c7d722 --- /dev/null +++ b/components/ocrspace/package.json @@ -0,0 +1,18 @@ +{ + "name": "@pipedream/ocrspace", + "version": "0.1.0", + "description": "Pipedream OCRSpace Components", + "main": "ocrspace.app.mjs", + "keywords": [ + "pipedream", + "ocrspace" + ], + "homepage": "https://pipedream.com/apps/ocrspace", + "author": "Pipedream (https://pipedream.com/)", + "publishConfig": { + "access": "public" + }, + "dependencies": { + "@pipedream/platform": "^3.0.3" + } +} diff --git a/components/ocrspace/sources/new-file-uploaded/new-file-uploaded.mjs b/components/ocrspace/sources/new-file-uploaded/new-file-uploaded.mjs deleted file mode 100644 index 72d297ec5ee32..0000000000000 --- a/components/ocrspace/sources/new-file-uploaded/new-file-uploaded.mjs +++ /dev/null @@ -1,111 +0,0 @@ -import { - axios, DEFAULT_POLLING_SOURCE_TIMER_INTERVAL, -} from "@pipedream/platform"; -import ocrspace from "../../ocrspace.app.mjs"; - -export default { - key: "ocrspace-new-file-uploaded", - name: "New OCR File Uploaded", - description: "Emit a new event when a new file is uploaded for OCR processing. [See the documentation]()", - version: "0.0.{{ts}}", - type: "source", - dedupe: "unique", - props: { - ocrspace, - db: "$.service.db", - timer: { - type: "$.interface.timer", - default: { - intervalSeconds: DEFAULT_POLLING_SOURCE_TIMER_INTERVAL, - }, - }, - monitoredFolder: { - type: "string", - label: "Monitored Folder", - description: "Optional folder to monitor for new file uploads.", - optional: true, - }, - processingQueue: { - type: "string", - label: "Processing Queue", - description: "Optional processing queue to monitor for new file uploads.", - optional: true, - }, - }, - methods: { - async fetchUploadedFiles() { - const params = {}; - if (this.monitoredFolder) { - params.folder = this.monitoredFolder; - } - if (this.processingQueue) { - params.queue = this.processingQueue; - } - const uploadedFiles = await this.ocrspace._makeRequest({ - path: "/list/uploads", - method: "GET", - params, - }); - return uploadedFiles; - }, - }, - hooks: { - async deploy() { - const files = await this.fetchUploadedFiles(); - // Sort files by timestamp descending - files.sort((a, b) => new Date(b.timestamp) - new Date(a.timestamp)); - const recentFiles = files.slice(0, 50).reverse(); - for (const file of recentFiles) { - const id = file.id || new Date(file.timestamp).getTime(); - const summary = `New file uploaded: ${file.name}`; - const ts = file.timestamp - ? new Date(file.timestamp).getTime() - : Date.now(); - this.$emit(file, { - id, - summary, - ts, - }); - } - if (files.length > 0) { - const latestFile = files[0]; - const lastFileId = latestFile.id || new Date(latestFile.timestamp).getTime(); - await this.db.set("lastFileId", lastFileId); - } - }, - async activate() { - // No action needed on activate - }, - async deactivate() { - // No action needed on deactivate - }, - }, - async run() { - const files = await this.fetchUploadedFiles(); - const lastFileId = (await this.db.get("lastFileId")) || 0; - const newFiles = files.filter( - (file) => - (file.id && file.id > lastFileId) || - (file.timestamp && new Date(file.timestamp).getTime() > lastFileId), - ); - // Sort newFiles by timestamp ascending - newFiles.sort((a, b) => new Date(a.timestamp) - new Date(b.timestamp)); - for (const file of newFiles) { - const id = file.id || new Date(file.timestamp).getTime(); - const summary = `New file uploaded: ${file.name}`; - const ts = file.timestamp - ? new Date(file.timestamp).getTime() - : Date.now(); - this.$emit(file, { - id, - summary, - ts, - }); - } - if (files.length > 0) { - const latestFile = files[0]; - const lastFileId = latestFile.id || new Date(latestFile.timestamp).getTime(); - await this.db.set("lastFileId", lastFileId); - } - }, -}; diff --git a/components/ocrspace/sources/new-ocr-job-completed-instant/new-ocr-job-completed-instant.mjs b/components/ocrspace/sources/new-ocr-job-completed-instant/new-ocr-job-completed-instant.mjs deleted file mode 100644 index cfed5441c340f..0000000000000 --- a/components/ocrspace/sources/new-ocr-job-completed-instant/new-ocr-job-completed-instant.mjs +++ /dev/null @@ -1,67 +0,0 @@ -import ocrspace from "../../ocrspace.app.mjs"; -import { axios } from "@pipedream/platform"; - -export default { - key: "ocrspace-new-ocr-job-completed-instant", - name: "New OCR Job Completed", - description: "Emit new event when an OCR job is completed. [See the documentation](https://ocr.space/ocrapi)", - version: "0.0.{{ts}}", - type: "source", - dedupe: "unique", - props: { - ocrspace: { - type: "app", - app: "ocrspace", - }, - http: { - type: "$.interface.http", - customResponse: true, - }, - db: "$.service.db", - }, - hooks: { - async activate() { - console.log(`Please configure your OCRSpace webhook URL to: ${this.http.endpoint}`); - }, - async deactivate() { - console.log("Webhook deactivated. Please remove the webhook URL from your OCRSpace settings."); - }, - async deploy() { - const lastJobId = await this.db.get("lastJobId"); - if (lastJobId) { - try { - const job = await this.ocrspace.retrieveOcrResult({ - jobId: lastJobId, - }); - this.$emit(job, { - id: job.jobId || lastJobId, - summary: `OCR job completed: ${job.jobId || lastJobId}`, - ts: Date.now(), - }); - } catch (error) { - console.error(`Failed to retrieve OCR job with ID ${lastJobId}:`, error); - } - } - }, - }, - async run(event) { - const job = event; - const jobId = job.jobId || job.id || null; - const summary = jobId - ? `OCR job completed: ${jobId}` - : "OCR job completed"; - const ts = job.completedAt - ? Date.parse(job.completedAt) - : Date.now(); - - this.$emit(job, { - id: jobId || ts.toString(), - summary, - ts, - }); - - if (jobId) { - await this.db.set("lastJobId", jobId); - } - }, -}; From ceb186ae48d57e99e9c5f27650d25b9375314db7 Mon Sep 17 00:00:00 2001 From: Luan Cazarine Date: Thu, 16 Jan 2025 12:30:10 -0300 Subject: [PATCH 3/6] pnpm update --- pnpm-lock.yaml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ca391384a140c..6bff53a27f362 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -7099,6 +7099,12 @@ importers: components/ocr_web_service: {} + components/ocrspace: + dependencies: + '@pipedream/platform': + specifier: ^3.0.3 + version: 3.0.3 + components/octoparse: {} components/octopus_deploy: {} @@ -9592,8 +9598,7 @@ importers: components/showpad: {} - components/shutterstock: - specifiers: {} + components/shutterstock: {} components/sidetracker: {} @@ -31013,6 +31018,8 @@ snapshots: '@putout/operator-filesystem': 5.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3)) '@putout/operator-json': 2.2.0 putout: 36.13.1(eslint@8.57.1)(typescript@5.6.3) + transitivePeerDependencies: + - supports-color '@putout/operator-regexp@1.0.0(putout@36.13.1(eslint@8.57.1)(typescript@5.6.3))': dependencies: From 5f17db7043793f63963ceb392fbddc6c652bc249 Mon Sep 17 00:00:00 2001 From: Luan Cazarine Date: Thu, 16 Jan 2025 15:47:44 -0300 Subject: [PATCH 4/6] some adjusts --- components/ocrspace/actions/process-pdf/process-pdf.mjs | 2 +- components/ocrspace/common/utils.mjs | 7 +++++-- components/ocrspace/package.json | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/components/ocrspace/actions/process-pdf/process-pdf.mjs b/components/ocrspace/actions/process-pdf/process-pdf.mjs index 4669979849a62..cda0790f5c620 100644 --- a/components/ocrspace/actions/process-pdf/process-pdf.mjs +++ b/components/ocrspace/actions/process-pdf/process-pdf.mjs @@ -15,7 +15,7 @@ export default { "file", ], label: "PDF File", - description: "The URL of the PDF file or the path to the file saved to the `/tmp` directory (e.g. `/tmp/example.jpg`) to process. [See the documentation](https://pipedream.com/docs/workflows/steps/code/nodejs/working-with-files/#the-tmp-directory).", + description: "The URL of the PDF file or the path to the file saved to the `/tmp` directory (e.g. `/tmp/example.pdf`) to process. [See the documentation](https://pipedream.com/docs/workflows/steps/code/nodejs/working-with-files/#the-tmp-directory).", }, }, methods: { diff --git a/components/ocrspace/common/utils.mjs b/components/ocrspace/common/utils.mjs index b9920ab13c563..3384dcc350320 100644 --- a/components/ocrspace/common/utils.mjs +++ b/components/ocrspace/common/utils.mjs @@ -1,4 +1,5 @@ import fs from "fs"; +import mime from "mime"; export const isValidUrl = (urlString) => { var urlPattern = new RegExp("^(https?:\\/\\/)?" + // validate protocol @@ -19,10 +20,12 @@ export const checkTmp = (filename) => { export const getUrlOrFile = (url) => { if (!isValidUrl(url)) { - const data = fs.readFileSync(checkTmp(url)); + const filePath = checkTmp(url); + const data = fs.readFileSync(filePath); + const mimeType = mime.getType(filePath); const base64Image = Buffer.from(data, "binary").toString("base64"); return { - file: `data:image/jpeg;base64,${base64Image}`, + file: `data:${mimeType};base64,${base64Image}`, }; } return { diff --git a/components/ocrspace/package.json b/components/ocrspace/package.json index 7c2dc40c7d722..a0ab1ae330c10 100644 --- a/components/ocrspace/package.json +++ b/components/ocrspace/package.json @@ -13,6 +13,7 @@ "access": "public" }, "dependencies": { - "@pipedream/platform": "^3.0.3" + "@pipedream/platform": "^3.0.3", + "mime": "^4.0.6" } } From 9545167a0daa87056f86029be6fd7521c2fb26ae Mon Sep 17 00:00:00 2001 From: Luan Cazarine Date: Thu, 16 Jan 2025 15:51:26 -0300 Subject: [PATCH 5/6] pnpm update --- pnpm-lock.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 6bff53a27f362..b484508c2f1d9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -7104,6 +7104,9 @@ importers: '@pipedream/platform': specifier: ^3.0.3 version: 3.0.3 + mime: + specifier: ^4.0.6 + version: 4.0.6 components/octoparse: {} @@ -22379,6 +22382,11 @@ packages: engines: {node: '>=16'} hasBin: true + mime@4.0.6: + resolution: {integrity: sha512-4rGt7rvQHBbaSOF9POGkk1ocRP16Md1x36Xma8sz8h8/vfCUI2OtEIeCqe4Ofes853x4xDoPiFLIT47J5fI/7A==} + engines: {node: '>=16'} + hasBin: true + mimer@2.0.2: resolution: {integrity: sha512-izxvjsB7Ur5HrTbPu6VKTrzxSMBFBqyZQc6dWlZNQ4/wAvf886fD4lrjtFd8IQ8/WmZKdxKjUtqFFNaj3hQ52g==} engines: {node: '>= 12'} @@ -39679,6 +39687,8 @@ snapshots: mime@4.0.4: {} + mime@4.0.6: {} + mimer@2.0.2: {} mimic-fn@2.1.0: {} From 39ec3bc903a9c9196bc840f185c2dbca2a9d87d1 Mon Sep 17 00:00:00 2001 From: Luan Cazarine Date: Fri, 17 Jan 2025 11:57:35 -0300 Subject: [PATCH 6/6] fix file field name --- components/ocrspace/actions/common/process-base.mjs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/components/ocrspace/actions/common/process-base.mjs b/components/ocrspace/actions/common/process-base.mjs index adc8f0cfffb4c..2a8fbc60500d1 100644 --- a/components/ocrspace/actions/common/process-base.mjs +++ b/components/ocrspace/actions/common/process-base.mjs @@ -1,3 +1,4 @@ +import { ConfigurationError } from "@pipedream/platform"; import FormData from "form-data"; import { getUrlOrFile } from "../../common/utils.mjs"; import ocrspace from "../../ocrspace.app.mjs"; @@ -49,7 +50,7 @@ export default { } = getUrlOrFile(this.file); if (url) data.append("url", url); - if (file) data.append("file", file); + if (file) data.append("base64Image", file); if (this.imageLanguage) data.append("language", this.imageLanguage); if (this.isOverlayRequired) data.append("isOverlayRequired", `${this.isOverlayRequired}`); if (this.filetype) data.append("filetype", this.filetype); @@ -65,6 +66,11 @@ export default { }); $.export("$summary", this.getSummary()); + + if (response.ErrorMessage) { + throw new ConfigurationError(response.ErrorMessage[0]); + } + return response; }, };