From 7ce0f71e2fc53cf9180c5af8659611b1b39d4c0f Mon Sep 17 00:00:00 2001 From: ivalkshfoeif Date: Wed, 21 Jun 2023 18:56:37 -0700 Subject: [PATCH 1/2] add JSONLines Loader --- .../documentloaders/Jsonlines/Jsonlines.ts | 105 ++++++++++++++++++ .../documentloaders/Jsonlines/jsonlines.svg | 16 +++ 2 files changed, 121 insertions(+) create mode 100644 packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts create mode 100644 packages/components/nodes/documentloaders/Jsonlines/jsonlines.svg diff --git a/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts b/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts new file mode 100644 index 00000000000..e92a97aa2f3 --- /dev/null +++ b/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts @@ -0,0 +1,105 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { TextSplitter } from 'langchain/text_splitter' +import { JSONLinesLoader } from 'langchain/document_loaders/fs/json' + +class Jsonlines_DocumentLoaders implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + + constructor() { + this.label = 'Json Lines File' + this.name = 'jsonlinesFile' + this.type = 'Document' + this.icon = 'jsonlines.svg' + this.category = 'Document Loaders' + this.description = `Load data from JSON Lines files` + this.baseClasses = [this.type] + this.inputs = [ + { + label: 'Jsonlines File', + name: 'jsonlinesFile', + type: 'file', + fileType: '.jsonl' + }, + { + label: 'Text Splitter', + name: 'textSplitter', + type: 'TextSplitter', + optional: true + }, + { + label: 'Pointer Extraction', + name: 'pointerName', + type: 'string', + description: 'Extracting the pointer', + placeholder: 'Enter pointer name', + optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true + } + ] + } + + async init(nodeData: INodeData): Promise { + const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const jsonLinesFileBase64 = nodeData.inputs?.jsonlinesFile as string + const pointerName = nodeData.inputs?.pointerName as string + const metadata = nodeData.inputs?.metadata + + let alldocs = [] + let files: string[] = [] + + if (jsonLinesFileBase64.startsWith('[') && jsonLinesFileBase64.endsWith(']')) { + files = JSON.parse(jsonLinesFileBase64) + } else { + files = [jsonLinesFileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const blob = new Blob([bf]) + const loader = new JSONLinesLoader(blob, pointerName) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } + } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of alldocs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return alldocs + } +} + +module.exports = { nodeClass: Jsonlines_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/Jsonlines/jsonlines.svg b/packages/components/nodes/documentloaders/Jsonlines/jsonlines.svg new file mode 100644 index 00000000000..f3686f0c983 --- /dev/null +++ b/packages/components/nodes/documentloaders/Jsonlines/jsonlines.svg @@ -0,0 +1,16 @@ + + + + + background + + + + + + + Layer 1 + JSON + Lines + + \ No newline at end of file From 7646e973e3a77d8f4500f82d3e70ba51bd203048 Mon Sep 17 00:00:00 2001 From: ivalkshfoeif Date: Wed, 21 Jun 2023 19:34:30 -0700 Subject: [PATCH 2/2] update pointer attribute and logic --- .../nodes/documentloaders/Jsonlines/Jsonlines.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts b/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts index e92a97aa2f3..4af8c2ce926 100644 --- a/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts +++ b/packages/components/nodes/documentloaders/Jsonlines/Jsonlines.ts @@ -37,9 +37,8 @@ class Jsonlines_DocumentLoaders implements INode { label: 'Pointer Extraction', name: 'pointerName', type: 'string', - description: 'Extracting the pointer', placeholder: 'Enter pointer name', - optional: true + optional: false }, { label: 'Metadata', @@ -60,6 +59,8 @@ class Jsonlines_DocumentLoaders implements INode { let alldocs = [] let files: string[] = [] + let pointer = '/' + pointerName.trim() + if (jsonLinesFileBase64.startsWith('[') && jsonLinesFileBase64.endsWith(']')) { files = JSON.parse(jsonLinesFileBase64) } else { @@ -71,7 +72,7 @@ class Jsonlines_DocumentLoaders implements INode { splitDataURI.pop() const bf = Buffer.from(splitDataURI.pop() || '', 'base64') const blob = new Blob([bf]) - const loader = new JSONLinesLoader(blob, pointerName) + const loader = new JSONLinesLoader(blob, pointer) if (textSplitter) { const docs = await loader.loadAndSplit(textSplitter)