diff --git a/packages/components/nodes/documentloaders/Confluence/Confluence.ts b/packages/components/nodes/documentloaders/Confluence/Confluence.ts new file mode 100644 index 00000000000..9a69be14efd --- /dev/null +++ b/packages/components/nodes/documentloaders/Confluence/Confluence.ts @@ -0,0 +1,118 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { TextSplitter } from 'langchain/text_splitter' +import { ConfluencePagesLoader, ConfluencePagesLoaderParams } from 'langchain/document_loaders/web/confluence' + +class Confluence_DocumentLoaders implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + + constructor() { + this.label = 'Confluence' + this.name = 'confluence' + this.type = 'Document' + this.icon = 'confluence.png' + this.category = 'Document Loaders' + this.description = `Load data from a Confluence Document` + this.baseClasses = [this.type] + this.inputs = [ + { + label: 'Text Splitter', + name: 'textSplitter', + type: 'TextSplitter', + optional: true + }, + { + label: 'Username', + name: 'username', + type: 'string', + placeholder: '' + }, + { + label: 'Access Token', + name: 'accessToken', + type: 'password', + placeholder: '' + }, + { + label: 'Base URL', + name: 'baseUrl', + type: 'string', + placeholder: 'https://example.atlassian.net/wiki' + }, + { + label: 'Space Key', + name: 'spaceKey', + type: 'string', + placeholder: '~EXAMPLE362906de5d343d49dcdbae5dEXAMPLE' + }, + { + label: 'Limit', + name: 'limit', + type: 'number', + default: 0, + optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true + } + ] + } + + async init(nodeData: INodeData): Promise { + const username = nodeData.inputs?.username as string + const accessToken = nodeData.inputs?.accessToken as string + const spaceKey = nodeData.inputs?.spaceKey as string + const baseUrl = nodeData.inputs?.baseUrl as string + const limit = nodeData.inputs?.limit as number + const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const metadata = nodeData.inputs?.metadata + + const options: ConfluencePagesLoaderParams = { + username, + accessToken, + baseUrl, + spaceKey, + limit + } + + const loader = new ConfluencePagesLoader(options) + + let docs = [] + + if (textSplitter) { + docs = await loader.loadAndSplit(textSplitter) + } else { + docs = await loader.load() + } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs + } +} + +module.exports = { nodeClass: Confluence_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/Confluence/confluence.png b/packages/components/nodes/documentloaders/Confluence/confluence.png new file mode 100644 index 00000000000..3cbb7b3dc24 Binary files /dev/null and b/packages/components/nodes/documentloaders/Confluence/confluence.png differ diff --git a/packages/components/package.json b/packages/components/package.json index 76cf63774fe..65eedd51f08 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -38,7 +38,8 @@ "node-fetch": "^2.6.11", "pdf-parse": "^1.1.1", "weaviate-ts-client": "^1.1.0", - "ws": "^8.9.0" + "ws": "^8.9.0", + "html-to-text": "^9.0.5" }, "devDependencies": { "@types/gulp": "4.0.9",