Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

FROM node:18-alpine
RUN apk add --update libc6-compat python3 make g++
# needed for pdfjs-dist
RUN apk add --no-cache build-base cairo-dev pango-dev

WORKDIR /usr/src/packages

Expand Down
2 changes: 2 additions & 0 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ USER root

RUN apk add --no-cache git
RUN apk add --no-cache python3 py3-pip make g++
# needed for pdfjs-dist
RUN apk add --no-cache build-base cairo-dev pango-dev

# You can install a specific version like: flowise@1.0.0
RUN npm install -g flowise
Expand Down
20 changes: 16 additions & 4 deletions packages/components/nodes/documentloaders/Pdf/Pdf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ class Pdf_DocumentLoaders implements INode {
],
default: 'perPage'
},
{
label: 'Use Legacy Build',
name: 'legacyBuild',
type: 'boolean',
optional: true,
additionalParams: true
},
{
label: 'Metadata',
name: 'metadata',
Expand All @@ -64,6 +71,7 @@ class Pdf_DocumentLoaders implements INode {
const pdfFileBase64 = nodeData.inputs?.pdfFile as string
const usage = nodeData.inputs?.usage as string
const metadata = nodeData.inputs?.metadata
const legacyBuild = nodeData.inputs?.legacyBuild as boolean

let alldocs = []
let files: string[] = []
Expand All @@ -81,8 +89,9 @@ class Pdf_DocumentLoaders implements INode {
if (usage === 'perFile') {
const loader = new PDFLoader(new Blob([bf]), {
splitPages: false,
// @ts-ignore
pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
})
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
Expand All @@ -92,8 +101,11 @@ class Pdf_DocumentLoaders implements INode {
alldocs.push(...docs)
}
} else {
// @ts-ignore
const loader = new PDFLoader(new Blob([bf]), { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
const loader = new PDFLoader(new Blob([bf]), {
pdfjs: () =>
// @ts-ignore
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
})
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
alldocs.push(...docs)
Expand Down
3 changes: 2 additions & 1 deletion packages/components/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@
"moment": "^2.29.3",
"node-fetch": "^2.6.11",
"pdf-parse": "^1.1.1",
"pdfjs-dist": "^3.7.107",
"playwright": "^1.35.0",
"srt-parser-2": "^1.2.3",
"puppeteer": "^20.7.1",
"srt-parser-2": "^1.2.3",
"weaviate-ts-client": "^1.1.0",
"ws": "^8.9.0"
},
Expand Down