Skip to content

Commit

Permalink
feat: add split listener and optimize performance of auto-completion
Browse files Browse the repository at this point in the history
  • Loading branch information
HaydenOrz committed Jun 8, 2023
1 parent bd22155 commit e69e204
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 13 deletions.
10 changes: 10 additions & 0 deletions src/parser/common/basic-parser-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,13 @@ export interface Suggestions<T = WordRange> {
*/
keywords: string[];
}

export interface TextSlice {
startIndex: number;
endIndex: number;
startLine: number;
endLine: number;
startColumn: number;
endColumn: number;
text: string;
}
116 changes: 107 additions & 9 deletions src/parser/common/basicParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@ import {
import { ParseTreeWalker, ParseTreeListener } from 'antlr4ts/tree';
import { CandidatesCollection, CodeCompletionCore } from 'antlr4-c3';
import { findCaretTokenIndex } from '../../utils/findCaretTokenIndex';
import { CaretPosition, Suggestions, SyntaxSuggestion, WordRange } from './basic-parser-types';
import {
CaretPosition,
Suggestions,
SyntaxSuggestion,
WordRange,
TextSlice
} from './basic-parser-types';
import ParserErrorListener, {
ParserError,
ErrorHandler,
Expand All @@ -22,6 +28,10 @@ interface IParser<IParserRuleContext extends ParserRuleContext> extends Parser {
program(): IParserRuleContext;
}

interface SplitListener extends ParseTreeListener {
statementsContext: ParserRuleContext[];
}

/**
* Custom Parser class, subclass needs extends it.
*/
Expand Down Expand Up @@ -60,13 +70,21 @@ export default abstract class BasicParser<
* @param candidates candidate list
* @param allTokens all tokens from input
* @param caretTokenIndex tokenIndex of caretPosition
* @param tokenIndexOffset offset of the tokenIndex in the candidates
* compared to the tokenIndex in allTokens
*/
public abstract processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number
caretTokenIndex: number,
tokenIndexOffset: number,
): Suggestions<Token>;

/**
* splitListener instance Getter
*/
protected abstract get splitListener (): SplitListener;

/**
* If it is invoked multiple times in a row and the input parameters is the same
* this method returns the parsing result directly for the first time,
Expand Down Expand Up @@ -130,6 +148,7 @@ export default abstract class BasicParser<
* @param input string
*/
public createParser(input: string): P {
this._parserTree = null;
this._charStreams = CharStreams.fromString(input.toUpperCase());
this._lexer = this.createLexerFormCharStream(this._charStreams);

Expand Down Expand Up @@ -167,24 +186,103 @@ export default abstract class BasicParser<
ParseTreeWalker.DEFAULT.walk(listener, parserTree);
}

/**
* split input into statements
* @param input source string
*/
public splitSQL(input): TextSlice[] {
this.parse(input);
const splitListener = this.splitListener;
this.listen(splitListener, this._parserTree);

const res = splitListener.statementsContext.map(context => {
const { start, stop } = context;
return {
startIndex: start.startIndex,
endIndex: stop.stopIndex,
startLine: start.line,
endLine: stop.line,
startColumn: start.charPositionInLine + 1,
endColumn: stop.charPositionInLine + stop.text.length,
text: this._parsedInput.slice(start.startIndex, stop.stopIndex + 1),
}
})

return res;
}

/**
* Get suggestions of syntax and token at caretPosition
* @param input source string
* @param caretPosition caret position, such as cursor position
* @returns suggestion
*/
getSuggestionAtCaretPosition(input: string, caretPosition: CaretPosition): Suggestions | null {
public getSuggestionAtCaretPosition(input: string, caretPosition: CaretPosition): Suggestions | null {
const splitListener = this.splitListener;
// TODO: add splitListener to all sqlParser implements add remove following if
if(!splitListener) return null;

this.parse(input);
const allTokens = this.getAllTokens(input);
const caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);

let sqlParserIns = this._parser;
let allTokens = this.getAllTokens(input);
let caretTokenIndex = findCaretTokenIndex(caretPosition, allTokens);
let c3Context: ParserRuleContext = this._parserTree;
let tokenIndexOffset: number = 0;

if(!caretTokenIndex && caretTokenIndex !== 0) return null;

/**
* Split sql by statement.
* Try to collect candidates from the caret statement only.
*/
this.listen(splitListener, this._parserTree);

const core = new CodeCompletionCore(this._parser);
// If there are multiple statements.
if (splitListener.statementsContext.length) {
// find statement rule context where caretPosition is located.
const caretStatementContext = splitListener?.statementsContext.find(ctx => {
return caretTokenIndex <= ctx.stop?.tokenIndex && caretTokenIndex >= ctx.start.tokenIndex;
});

if(caretStatementContext) {
c3Context = caretStatementContext
} else {
const lastStatementToken= splitListener
.statementsContext[splitListener?.statementsContext.length - 2]
.stop;
/**
* If caretStatementContext is not found and it follows all statements.
* Reparses part of the input following the penultimate statement.
* And c3 will collect candidates in the new parserTreeContext.
*/
if (caretTokenIndex > lastStatementToken?.tokenIndex) {
/**
* Save offset of the tokenIndex in the partInput
* compared to the tokenIndex in the whole input
*/
tokenIndexOffset = lastStatementToken?.tokenIndex + 1;
// Correct caretTokenIndex
caretTokenIndex = caretTokenIndex - tokenIndexOffset;

const inputSlice = input.slice(lastStatementToken.stopIndex + 1);
const charStreams = CharStreams.fromString(inputSlice.toUpperCase());
const lexer = this.createLexerFormCharStream(charStreams);
const tokenStream = new CommonTokenStream(lexer);
tokenStream.fill();
const parser = this.createParserFromTokenStream(tokenStream);
parser.buildParseTree = true;
sqlParserIns = parser;
c3Context = parser.program();
}
}
}

const core = new CodeCompletionCore(sqlParserIns);
core.preferredRules = this.preferredRules;
const candidates = core.collectCandidates(caretTokenIndex);

const originalSuggestions = this.processCandidates(candidates, allTokens, caretTokenIndex);
const candidates = core.collectCandidates(caretTokenIndex, c3Context);
const originalSuggestions = this.processCandidates(candidates, allTokens, caretTokenIndex, tokenIndexOffset);

const syntaxSuggestions: SyntaxSuggestion<WordRange>[] = originalSuggestions.syntax
.map(syntaxCtx => {
const wordRanges: WordRange[] = syntaxCtx.wordRanges.map(token => {
Expand Down
36 changes: 32 additions & 4 deletions src/parser/flinksql.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
import { Token } from 'antlr4ts';
import { CandidatesCollection } from 'antlr4-c3';
import { FlinkSqlLexer } from '../lib/flinksql/FlinkSqlLexer';
import { FlinkSqlParser, ProgramContext } from '../lib/flinksql/FlinkSqlParser';
import {
FlinkSqlParser,
ProgramContext,
SqlStatementContext,
SqlStatementsContext
} from '../lib/flinksql/FlinkSqlParser';
import { FlinkSqlParserListener } from 'src/lib/flinksql/FlinkSqlParserListener';
import { SyntaxContextType, Suggestions, SyntaxSuggestion } from './common/basic-parser-types';
import BasicParser from './common/basicParser';

Expand All @@ -24,17 +30,24 @@ export default class FlinkSQL extends BasicParser<FlinkSqlLexer, ProgramContext,
FlinkSqlParser.RULE_catalogPath, // catalog name
]);

protected get splitListener () {
return new FlinkSqlSplitListener();
}

public processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number
caretTokenIndex: number,
tokenIndexOffset: number
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];

for (let candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate
const tokenRanges = allTokens.slice(candidateRule.startTokenIndex, caretTokenIndex + 1)
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + 1);

let syntaxContextType: SyntaxContextType;
switch (ruleType) {
case FlinkSqlParser.RULE_tablePath: {
Expand Down Expand Up @@ -85,3 +98,18 @@ export default class FlinkSQL extends BasicParser<FlinkSqlLexer, ProgramContext,
}
}
}

export class FlinkSqlSplitListener implements FlinkSqlParserListener {
private _statementsContext: SqlStatementContext[] = [];

exitSqlStatement = (ctx: SqlStatementContext) => {
this._statementsContext.push(ctx);
}

enterSqlStatements = (ctx: SqlStatementsContext) => {
};

get statementsContext () {
return this._statementsContext;
}
}
4 changes: 4 additions & 0 deletions src/parser/generic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ export default class GenericSQL extends BasicParser<SqlLexer, ProgramContext, Sq

public preferredRules: Set<number> = new Set();

protected get splitListener () {
return null as any;
}

public processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
Expand Down
4 changes: 4 additions & 0 deletions src/parser/hive.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ export default class HiveSQL extends BasicParser<HiveSqlLexer, ProgramContext, H
return new HiveSql(tokenStream);
}

protected get splitListener () {
return null as any;
}

public preferredRules: Set<number> = new Set();

public processCandidates(
Expand Down
4 changes: 4 additions & 0 deletions src/parser/pgsql.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ export default class PostgresSQL extends BasicParser<PostgreSQLLexer, ProgramCon

public preferredRules: Set<number> = new Set();

protected get splitListener () {
return null as any;
}

public processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
Expand Down
4 changes: 4 additions & 0 deletions src/parser/plsql.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ export default class PLSQL extends BasicParser<PlSqlLexer, ProgramContext, PlSql

public preferredRules: Set<number> = new Set();

protected get splitListener () {
return null as any;
}

public processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
Expand Down
4 changes: 4 additions & 0 deletions src/parser/spark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ export default class SparkSQL extends BasicParser<SparkSqlLexer, ProgramContext,

public preferredRules: Set<number> = new Set();

protected get splitListener () {
return null as any;
}

public processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
Expand Down
4 changes: 4 additions & 0 deletions src/parser/trinosql.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ export default class TrinoSQL extends BasicParser<TrinoSqlLexer, ProgramContext,
return parser;
}

protected get splitListener () {
return null as any;
}

public preferredRules: Set<number> = new Set();

public processCandidates(
Expand Down

0 comments on commit e69e204

Please sign in to comment.