From 17318ad7bbeda768ea0b6a47dab59292f36a0888 Mon Sep 17 00:00:00 2001 From: Pravesh-mansharamani Date: Thu, 15 Jun 2023 07:27:07 +0530 Subject: [PATCH] Added the LatexTextSplitter --- .../LatexTextSplitter/LatexTextSplitter.ts | 52 +++++++++++++++++++ .../LatexTextSplitter/latexTextSplitter.svg | 6 +++ 2 files changed, 58 insertions(+) create mode 100644 packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts create mode 100644 packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg diff --git a/packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts b/packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts new file mode 100644 index 00000000000..d6568d07875 --- /dev/null +++ b/packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts @@ -0,0 +1,52 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { getBaseClasses } from '../../../src/utils' +import { RecursiveCharacterTextSplitter, RecursiveCharacterTextSplitterParams } from 'langchain/text_splitter' + +class LatexTextSplitter_TextSplitters implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + constructor() { + this.label = 'Latex Text Splitter' + this.name = 'latexTextSplitter' + this.type = 'LatexTextSplitter' + this.icon = 'latexTextSplitter.svg' + this.category = 'Text Splitters' + this.description = `Split documents along Latex headings, headlines, enumerations and more.` + this.baseClasses = [this.type, ...getBaseClasses(RecursiveCharacterTextSplitter)] + this.inputs = [ + { + label: 'Chunk Size', + name: 'chunkSize', + type: 'number', + default: 1000, + optional: true + }, + { + label: 'Chunk Overlap', + name: 'chunkOverlap', + type: 'number', + optional: true + } + ] + } + async init(nodeData: INodeData): Promise { + const chunkSize = nodeData.inputs?.chunkSize as string + const chunkOverlap = nodeData.inputs?.chunkOverlap as string + + const obj = {} as RecursiveCharacterTextSplitterParams + + if (chunkSize) obj.chunkSize = parseInt(chunkSize, 10) + if (chunkOverlap) obj.chunkOverlap = parseInt(chunkOverlap, 10) + + const splitter = RecursiveCharacterTextSplitter.fromLanguage('latex', obj) + + return splitter + } +} +module.exports = { nodeClass: LatexTextSplitter_TextSplitters } diff --git a/packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg b/packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg new file mode 100644 index 00000000000..ae9d89beb33 --- /dev/null +++ b/packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg @@ -0,0 +1,6 @@ + + + + + +