feat: add split listener and optimize performance of auto-completion

DTStack · Jun 8, 2023 · e69e204 · e69e204
1 parent bd22155
commit e69e204
Show file tree

Hide file tree

Showing 9 changed files with 173 additions and 13 deletions.
diff --git a/src/parser/common/basic-parser-types.ts b/src/parser/common/basic-parser-types.ts
@@ -59,3 +59,13 @@ export interface Suggestions<T = WordRange> {
      */
     keywords: string[];
 }
+
+export interface TextSlice {
+    startIndex: number;
+    endIndex: number;
+    startLine: number;
+    endLine: number;
+    startColumn: number;
+    endColumn: number;
+    text: string;
+}
diff --git a/src/parser/common/basicParser.ts b/src/parser/common/basicParser.ts
@@ -10,7 +10,13 @@ import {
 import { ParseTreeWalker, ParseTreeListener } from 'antlr4ts/tree';
 import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
 import { findCaretTokenIndex } from '../../utils/findCaretTokenIndex';
-import { CaretPosition, Suggestions, SyntaxSuggestion, WordRange } from './basic-parser-types';
+import { 
+    CaretPosition,
+    Suggestions,
+    SyntaxSuggestion,
+    WordRange,
+    TextSlice
+} from './basic-parser-types';
 import ParserErrorListener, {
     ParserError,
     ErrorHandler,
@@ -22,6 +28,10 @@ interface IParser<IParserRuleContext extends ParserRuleContext> extends Parser {
     program(): IParserRuleContext;
 }
 
+interface SplitListener extends ParseTreeListener {
+    statementsContext: ParserRuleContext[];
+}
+
 /**
  * Custom Parser class, subclass needs extends it.
  */
@@ -60,13 +70,21 @@ export default abstract class BasicParser<
      * @param candidates candidate list
      * @param allTokens all tokens from input
      * @param caretTokenIndex tokenIndex of caretPosition
+     * @param tokenIndexOffset offset of the tokenIndex in the candidates 
+     * compared to the tokenIndex in allTokens
      */
     public abstract processCandidates(
         candidates: CandidatesCollection, 
         allTokens: Token[], 
-        caretTokenIndex: number
+        caretTokenIndex: number,
+        tokenIndexOffset: number,
     ): Suggestions<Token>;
 
+    /**
+     * splitListener instance Getter
+     */
+    protected abstract get splitListener (): SplitListener; 
+
     /**
      * If it is invoked multiple times in a row and the input parameters is the same 
      * this method returns the parsing result directly for the first time，
@@ -130,6 +148,7 @@ export default abstract class BasicParser<
      * @param input string
      */
     public createParser(input: string): P {
+        this._parserTree = null;
         this._charStreams = CharStreams.fromString(input.toUpperCase());
         this._lexer = this.createLexerFormCharStream(this._charStreams);
 
@@ -167,24 +186,103 @@ export default abstract class BasicParser<
         ParseTreeWalker.DEFAULT.walk(listener, parserTree);
     }
 
+    /**
+     * split input into statements
+     * @param input source string
+     */
+    public splitSQL(input): TextSlice[] {
+        this.parse(input);
+        const splitListener = this.splitListener;
+        this.listen(splitListener, this._parserTree);
+
+        const res = splitListener.statementsContext.map(context => {
+            const { start, stop } = context;
+            return {
+                startIndex: start.startIndex,
+                endIndex: stop.stopIndex,
+                startLine: start.line,
+                endLine: stop.line,
+                startColumn: start.charPositionInLine + 1,
+                endColumn: stop.charPositionInLine + stop.text.length,
+                text: this._parsedInput.slice(start.startIndex, stop.stopIndex + 1),
+            }
+        })
+
+        return res;
+    }
+
     /**
      * Get suggestions of syntax and token at caretPosition
      * @param input source string
      * @param caretPosition caret position, such as cursor position
      * @returns suggestion
      */
-    getSuggestionAtCaretPosition(input: string, caretPosition: CaretPosition): Suggestions | null {
+    public getSuggestionAtCaretPosition(input: string, caretPosition: CaretPosition): Suggestions | null {
+        const splitListener = this.splitListener;
+        // TODO: add splitListener to all sqlParser implements add remove following if
+        if(!splitListener) return null;
+
         this.parse(input);
-        const allTokens = this.getAllTokens(input);
-        const caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
-
+        let sqlParserIns = this._parser;
+        let allTokens = this.getAllTokens(input);
+        let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
+        let c3Context: ParserRuleContext = this._parserTree;
+        let tokenIndexOffset: number = 0;
+
         if(!caretTokenIndex && caretTokenIndex !== 0) return null;
+
+        /**
+         * Split sql by statement.
+         * Try to collect candidates from the caret statement only.
+         */
+        this.listen(splitListener, this._parserTree);
 
-        const core = new CodeCompletionCore(this._parser);
+        // If there are multiple statements.
+        if (splitListener.statementsContext.length) {
+            // find statement rule context where caretPosition is located.
+            const caretStatementContext = splitListener?.statementsContext.find(ctx => {
+                return caretTokenIndex <= ctx.stop?.tokenIndex && caretTokenIndex >= ctx.start.tokenIndex;
+            });
+
+            if(caretStatementContext) {
+                c3Context = caretStatementContext
+            } else {
+                const lastStatementToken= splitListener
+                    .statementsContext[splitListener?.statementsContext.length - 2]
+                    .stop;
+                /**
+                 * If caretStatementContext is not found and it follows all statements.
+                 * Reparses part of the input following the penultimate statement.
+                 * And c3 will collect candidates in the new parserTreeContext.
+                 */
+                if (caretTokenIndex > lastStatementToken?.tokenIndex) {
+                    /**
+                     * Save offset of the tokenIndex in the partInput
+                     * compared to the tokenIndex in the whole input 
+                     */  
+                    tokenIndexOffset = lastStatementToken?.tokenIndex + 1;
+                    // Correct caretTokenIndex
+                    caretTokenIndex = caretTokenIndex - tokenIndexOffset;
+
+                    const inputSlice = input.slice(lastStatementToken.stopIndex + 1);
+                    const charStreams = CharStreams.fromString(inputSlice.toUpperCase());
+                    const lexer = this.createLexerFormCharStream(charStreams);
+                    const tokenStream = new CommonTokenStream(lexer);
+                    tokenStream.fill();
+                    const parser = this.createParserFromTokenStream(tokenStream);
+                    parser.buildParseTree = true;
+                    sqlParserIns = parser;
+                    c3Context = parser.program();
+                }
+            }
+        }
+
+        const core = new CodeCompletionCore(sqlParserIns);
         core.preferredRules = this.preferredRules;
-        const candidates = core.collectCandidates(caretTokenIndex);
 
-        const originalSuggestions = this.processCandidates(candidates, allTokens, caretTokenIndex);
+        const candidates = core.collectCandidates(caretTokenIndex, c3Context);
+        const originalSuggestions = this.processCandidates(candidates, allTokens, caretTokenIndex, tokenIndexOffset);
+
         const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax
             .map(syntaxCtx => {
                 const wordRanges: WordRange[] = syntaxCtx.wordRanges.map(token => {

diff --git a/src/parser/flinksql.ts b/src/parser/flinksql.ts
@@ -1,7 +1,13 @@
 import { Token } from 'antlr4ts';
 import { CandidatesCollection } from 'antlr4-c3';
 import { FlinkSqlLexer } from '../lib/flinksql/FlinkSqlLexer';
-import { FlinkSqlParser, ProgramContext } from '../lib/flinksql/FlinkSqlParser';
+import {
+    FlinkSqlParser,
+    ProgramContext,
+    SqlStatementContext,
+    SqlStatementsContext
+} from '../lib/flinksql/FlinkSqlParser';
+import { FlinkSqlParserListener } from 'src/lib/flinksql/FlinkSqlParserListener';
 import { SyntaxContextType, Suggestions, SyntaxSuggestion } from './common/basic-parser-types';
 import BasicParser from './common/basicParser';
 
@@ -24,17 +30,24 @@ export default class FlinkSQL extends BasicParser<FlinkSqlLexer, ProgramContext,
         FlinkSqlParser.RULE_catalogPath, // catalog name
     ]);
 
+    protected get splitListener () {
+        return new FlinkSqlSplitListener();
+    }
+
     public processCandidates(
         candidates: CandidatesCollection,
         allTokens: Token[],
-        caretTokenIndex: number
+        caretTokenIndex: number,
+        tokenIndexOffset: number
     ): Suggestions<Token> {
         const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
         const keywords: string[] = [];
 
         for (let candidate of candidates.rules) {
-            const [ruleType, candidateRule] = candidate
-            const tokenRanges = allTokens.slice(candidateRule.startTokenIndex, caretTokenIndex + 1)
+            const [ruleType, candidateRule] = candidate;
+            const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
+            const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);
+
             let syntaxContextType: SyntaxContextType;
             switch (ruleType) {
                 case FlinkSqlParser.RULE_tablePath: {
@@ -85,3 +98,18 @@ export default class FlinkSQL extends BasicParser<FlinkSqlLexer, ProgramContext,
         }
     }
 }
+
+export class FlinkSqlSplitListener implements FlinkSqlParserListener {
+    private _statementsContext: SqlStatementContext[] = [];
+
+    exitSqlStatement = (ctx: SqlStatementContext) => {
+        this._statementsContext.push(ctx);
+    }
+
+    enterSqlStatements = (ctx: SqlStatementsContext) => {
+    };
+
+    get statementsContext () {
+        return this._statementsContext;
+    }
+}
diff --git a/src/parser/generic.ts b/src/parser/generic.ts
@@ -17,6 +17,10 @@ export default class GenericSQL extends BasicParser<SqlLexer, ProgramContext, Sq
 
     public preferredRules: Set<number> = new Set();
 
+    protected get splitListener () {
+        return null as any;
+    }
+
     public processCandidates(
         candidates: CandidatesCollection, 
         allTokens: Token[], 

diff --git a/src/parser/hive.ts b/src/parser/hive.ts
@@ -16,6 +16,10 @@ export default class HiveSQL extends BasicParser<HiveSqlLexer, ProgramContext, H
         return new HiveSql(tokenStream);
     }
 
+    protected get splitListener () {
+        return null as any;
+    }
+
     public preferredRules: Set<number> = new Set();
 
     public processCandidates(

diff --git a/src/parser/pgsql.ts b/src/parser/pgsql.ts
@@ -17,6 +17,10 @@ export default class PostgresSQL extends BasicParser<PostgreSQLLexer, ProgramCon
 
     public preferredRules: Set<number> = new Set();
 
+    protected get splitListener () {
+        return null as any;
+    }
+
     public processCandidates(
         candidates: CandidatesCollection, 
         allTokens: Token[], 

diff --git a/src/parser/plsql.ts b/src/parser/plsql.ts
@@ -17,6 +17,10 @@ export default class PLSQL extends BasicParser<PlSqlLexer, ProgramContext, PlSql
 
     public preferredRules: Set<number> = new Set();
 
+    protected get splitListener () {
+        return null as any;
+    }
+
     public processCandidates(
         candidates: CandidatesCollection, 
         allTokens: Token[], 

diff --git a/src/parser/spark.ts b/src/parser/spark.ts
@@ -17,6 +17,10 @@ export default class SparkSQL extends BasicParser<SparkSqlLexer, ProgramContext,
 
     public preferredRules: Set<number> = new Set();
 
+    protected get splitListener () {
+        return null as any;
+    }
+
     public processCandidates(
         candidates: CandidatesCollection, 
         allTokens: Token[], 

diff --git a/src/parser/trinosql.ts b/src/parser/trinosql.ts
@@ -16,6 +16,10 @@ export default class TrinoSQL extends BasicParser<TrinoSqlLexer, ProgramContext,
         return parser;
     }
 
+    protected get splitListener () {
+        return null as any;
+    }
+
     public preferredRules: Set<number> = new Set();
 
     public processCandidates(