Skip to content

Commit

Permalink
Occurrence number validation:
Browse files Browse the repository at this point in the history
* The validation is only performed once when the first Parser instance is created.
* If a problem is detected an Error will be thrown (fail fast)
* Certain combinations of DSL productions and their arguments cannot appear more
  than once inside the same top level rule.
  The disallowed combinations:
  * CONSUME : the same occurrence index and the same terminal identity
  * SUBRULE: the same occurrence index and the same subrule invoked
  * MANY, OR, OPTION, AT_LEAST_ONE: the same occurrence index
* These rules are necessary as the above combinations indicate the current position
  in the grammar during runtime. This information is needed in order to perform
  error recovery/ build automatic lookahead functions / ...
  • Loading branch information
firasnajjar committed Jun 16, 2015
1 parent 4bd0b2f commit 589f9b2
Show file tree
Hide file tree
Showing 9 changed files with 316 additions and 10 deletions.
1 change: 1 addition & 0 deletions build/chevrotain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
/// <reference path="../src/parse/cache.ts" />
/// <reference path="../src/parse/grammar/lookahead.ts" />
/// <reference path="../src/parse/gast_builder.ts" />
/// <reference path="../src/parse/grammar/checks.ts" />
/// <reference path="../src/parse/recognizer.ts" />
/// <reference path="../src/api.ts" />

Expand Down
6 changes: 3 additions & 3 deletions examples/ecmascript5/ecmascript5_parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1024,7 +1024,7 @@ module chevrotain.examples.ecma5 {
inPossible = this.canInComeAfterExp(headerExp)
})

headerPart = this.SUBRULE(this.ForHeaderParts, [inPossible])
headerPart = this.SUBRULE2(this.ForHeaderParts, [inPossible])
return PT(ForNoVarHeader, [headerExp, headerPart])
}}
], "var or expression")
Expand Down Expand Up @@ -1169,7 +1169,7 @@ module chevrotain.examples.ecma5 {
defaultClause = this.SUBRULE(this.DefaultClause)
})
this.OPTION3(() => {
clausesAfterDefault = this.SUBRULE(this.CaseClauses)
clausesAfterDefault = this.SUBRULE2(this.CaseClauses)
})
this.CONSUME(RCurly)

Expand Down Expand Up @@ -1253,7 +1253,7 @@ module chevrotain.examples.ecma5 {
})
}},
{ALT: () => {
finallyPt = this.SUBRULE(this.Finally)
finallyPt = this.SUBRULE2(this.Finally)
}}
], "catch or finally")
// @formatter:on
Expand Down
4 changes: 2 additions & 2 deletions examples/error_recovery/sql_statements/sql_recovery_parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,10 @@ module chevrotain.examples.recovery.sql {

// parse
this.CONSUME1(LParenTok)
this.SUBRULE(this.value)
this.SUBRULE1(this.value)
this.MANY(() => {
commas.push(this.CONSUME1(CommaTok))
values.push(this.SUBRULE(this.value))
values.push(this.SUBRULE2(this.value))
})
this.CONSUME1(RParenTok)
// tree rewrite
Expand Down
2 changes: 1 addition & 1 deletion src/parse/gast_builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -312,8 +312,8 @@ module chevrotain.gastBuilder {
}
}

export class GastRefResolverVisitor extends gast.GAstVisitor {

export class GastRefResolverVisitor extends gast.GAstVisitor {
constructor(private nameToProd:lang.HashTable<gast.TOP_LEVEL>) { super() }

public resolveRefs():void {
Expand Down
110 changes: 110 additions & 0 deletions src/parse/grammar/checks.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/// <reference path="gast.ts" />
/// <reference path="../../../libs/lodash.d.ts" />

module chevrotain.validations {

import gast = chevrotain.gast

export class GrammarError implements Error {

public get message() :string {
return getProductionDslName(this.refs[0]) + " with occurence number " + (<any>this.refs[0]).occurrenceInParent +
" appears " + this.refs.length + " times in " + this.topLevelRule.name
+ " with the same occurrence number"
}

//In order to show the message in the printed error in the console
public toString() :string {
return this.name + ": " + this.message
}

constructor(public refs: gast.IProduction[],
public topLevelRule: gast.TOP_LEVEL,
public name: string = "Occurrence Number Error" ) {
}
}

export function validateGrammar(topLevels:gast.TOP_LEVEL[]): GrammarError[] {
var errorsArrays = _.map(topLevels, (topLevel) => {
var collectorVisitor = new OccurrenceValidationCollector()
topLevel.accept(collectorVisitor)
return validateOccurrenceUsageInProductions(collectorVisitor.allProductions, topLevel)
})

return _.flatten<GrammarError>(errorsArrays)
}

export function getProductionDslName(prod: gast.IProduction) {
if (prod instanceof gast.Terminal) {
return "CONSUME"
} else if ( prod instanceof gast.ProdRef) {
return "SUBRULE"
} else {
return lang.functionName((<any>prod).constructor)
}
}

export function getRelevantProductionArgument(prod: gast.IProduction, defaultProductionArgument: string = "...") {
if (prod instanceof gast.Terminal) {
return lang.functionName((<gast.Terminal>prod).terminalType)
} else if ( prod instanceof gast.ProdRef) {
return (<gast.ProdRef>prod).refProdName
} else {
return defaultProductionArgument
}
}

export function identifyProductionForDuplicates(prod: gast.IProduction) {
return getProductionDslName(prod) + (<any>prod).occurrenceInParent + "(" + getRelevantProductionArgument(prod) + ")"
}

export function productionOccurrenceErrorGenerator(prods:gast.IProduction[], topLevelRule:gast.TOP_LEVEL) : GrammarError {
//All the productions should be of the same type and they have the same occurrence number
var representativeProduction = prods[0]
return new GrammarError(prods, topLevelRule)
}

export function validateOccurrenceUsageInProductions(productions:gast.IProduction[], topLevel:gast.TOP_LEVEL) : GrammarError[] {
var groups = _.groupBy(productions, identifyProductionForDuplicates)

//Cannot use _.filter because groups is an object of arrays and not an array
var errors = []
_.forEach(groups, function(groupValues, groupKey) {
if (groupValues.length > 1) {
errors.push(productionOccurrenceErrorGenerator(groupValues, topLevel))
}
})

return errors
}

export class OccurrenceValidationCollector extends gast.GAstVisitor {
public allProductions : gast.IProduction[]= []

public visitProdRef(subrule:gast.ProdRef):void {
this.allProductions.push(subrule)
}

public visitOPTION(option:gast.OPTION):void {
this.allProductions.push(option)
}

public visitAT_LEAST_ONE(atLeastOne:gast.AT_LEAST_ONE):void {
this.allProductions.push(atLeastOne)
}

public visitMANY(many:gast.MANY):void {
this.allProductions.push(many)
}

public visitOR(or:gast.OR):void {
this.allProductions.push(or)
}

public visitTerminal(terminal:gast.Terminal):void {
this.allProductions.push(terminal)
}

}

}
11 changes: 9 additions & 2 deletions src/parse/recognizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
/// <reference path="grammar/interpreter.ts" />
/// <reference path="grammar/follow.ts" />
/// <reference path="grammar/lookahead.ts" />
/// <reference path="grammar/checks.ts" />


/// <reference path="../../libs/lodash.d.ts" />
Expand All @@ -21,6 +22,7 @@ module chevrotain.recognizer {
import gastBuilder = chevrotain.gastBuilder
import follows = chevrotain.follow
import lookahead = chevrotain.lookahead
import validations = chevrotain.validations

// hacks to bypass no support for custom Errors in javascript/typescript
export function isRecognitionException(error:Error) {
Expand Down Expand Up @@ -308,7 +310,7 @@ module chevrotain.recognizer {
*/
export class BaseIntrospectionRecognizer extends BaseRecognizer {

protected static performSelfAnalysis(classInstance:any) {
protected static performSelfAnalysis(classInstance:BaseIntrospectionRecognizer) {
var className = lang.classNameFromInstance(classInstance)
// this information only needs to be computed once
if (!cache.CLASS_TO_SELF_ANALYSIS_DONE.containsKey(className)) {
Expand All @@ -318,6 +320,10 @@ module chevrotain.recognizer {
var allFollows = follows.computeAllProdsFollows(grammarProductions.values())
cache.setResyncFollowsForClass(className, allFollows)
cache.CLASS_TO_SELF_ANALYSIS_DONE.put(className, true)
var validationErrors = validations.validateGrammar(grammarProductions.values())
if (validationErrors.length > 0) {
throw validationErrors
}
}
}

Expand Down Expand Up @@ -824,7 +830,8 @@ module chevrotain.recognizer {
var parserClassProductions = cache.getProductionsForClass(this.className)
// only build the gast representation once
if (!(parserClassProductions.containsKey(ruleName))) {
parserClassProductions.put(ruleName, gastBuilder.buildTopProduction(impl.toString(), ruleName, this.tokensMap))
var gastProduction = gastBuilder.buildTopProduction(impl.toString(), ruleName, this.tokensMap)
parserClassProductions.put(ruleName, gastProduction)
}

var wrappedGrammarRule = function (idxInCallingRule:number = 1, args:any[] = []) {
Expand Down
149 changes: 149 additions & 0 deletions test/parse/grammar/validations_spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
module chevrotain.validations.spec {
import gast = chevrotain.gast
import tok = chevrotain.tokens
import samples = test.samples

describe("Grammar Validations module", function () {

describe("validateGrammar", function () {

it("validates every one of the TOP_RULEs in the input", function () {
var qualifiedNameErr1 = new gast.TOP_LEVEL("qualifiedNameErr1", [
new gast.Terminal(samples.IdentTok, 1),
new gast.MANY([
new gast.Terminal(samples.DotTok),
new gast.Terminal(samples.IdentTok, 1)//this is the error
])
])

var qualifiedNameErr2 = new gast.TOP_LEVEL("qualifiedNameErr2", [
new gast.Terminal(samples.IdentTok, 1),
new gast.MANY([
new gast.Terminal(samples.DotTok),
new gast.Terminal(samples.IdentTok, 2)
]),
new gast.MANY([
new gast.Terminal(samples.DotTok),
new gast.Terminal(samples.IdentTok, 2)
])
])
var errors = validateGrammar([qualifiedNameErr1, qualifiedNameErr2])
expect(errors.length).toBe(4)
})
})

describe("identifyProductionForDuplicates function", function () {
it("generates DSL code for a ProdRef", function () {
var dslCode = identifyProductionForDuplicates(new gast.ProdRef("this.ActionDeclaration"))
expect(dslCode).toBe("SUBRULE1(this.ActionDeclaration)")
})

it("generates DSL code for a OPTION", function () {
var dslCode = identifyProductionForDuplicates(new gast.OPTION([], 1))
expect(dslCode).toBe("OPTION1(...)")
})

it("generates DSL code for a AT_LEAST_ONE", function () {
var dslCode = identifyProductionForDuplicates(new gast.AT_LEAST_ONE([], 1))
expect(dslCode).toBe("AT_LEAST_ONE1(...)")
})

it("generates DSL code for a MANY", function () {
var dslCode = identifyProductionForDuplicates(new gast.MANY([], 1))
expect(dslCode).toBe("MANY1(...)")
})

it("generates DSL code for a OR", function () {
var dslCode = identifyProductionForDuplicates(new gast.OR([], 1))
expect(dslCode).toBe("OR1(...)")
})

it("generates DSL code for a Terminal", function () {
var dslCode = identifyProductionForDuplicates(new gast.Terminal(samples.IdentTok, 1))
expect(dslCode).toBe("CONSUME1(IdentTok)")
})
})

describe("The error generator function", function () {
it("generates the correct error for OPTION with the same occurrence number", function () {
var options : [gast.OPTION] = [
new gast.OPTION([], 1),
new gast.OPTION([], 1),
new gast.OPTION([], 1)
]
var error = productionOccurrenceErrorGenerator(options, null)
expect(error).toBeDefined()
expect(getProductionDslName(error.refs[0])).toBe("OPTION")
expect(error.refs.length).toBe(3)
})

it("generates the correct error for AT_LEAST_ONE with the same occurrence number", function () {
var atLeastOne : [gast.AT_LEAST_ONE] = [
new gast.AT_LEAST_ONE([], 2),
new gast.AT_LEAST_ONE([], 2)
]
var error = productionOccurrenceErrorGenerator(atLeastOne, null)
expect(error).toBeDefined()
expect(getProductionDslName(error.refs[0])).toBe("AT_LEAST_ONE")
expect(error.refs.length).toBe(2)
})

it("generates the correct error for terminals with the same token and occurrence number", function () {
var consumeToks : [gast.Terminal] = [
new gast.Terminal(samples.IdentTok, 1),
new gast.Terminal(samples.IdentTok, 1),
new gast.Terminal(samples.IdentTok, 1)
]
var error = productionOccurrenceErrorGenerator(consumeToks, null)
expect(error).toBeDefined()
expect(getProductionDslName(error.refs[0])).toBe("CONSUME")
expect(error.refs.length).toBe(3)
})

it("generates the correct error for ProdRef with the same referenced rule and occurrence number", function () {
var subRules : [gast.ProdRef] = [
new gast.ProdRef("this.GroupBy"),
new gast.ProdRef("this.GroupBy"),
new gast.ProdRef("this.GroupBy")
]
var error = productionOccurrenceErrorGenerator(subRules, null)
expect(error).toBeDefined()
expect(getProductionDslName(error.refs[0])).toBe("SUBRULE")
expect(error.refs.length).toBe(3)
})

})

describe("OccurrenceValidationCollector", function () {

it("collects all the productions relevant to occurrence validation", function () {
var qualifiedNameVisitor = new OccurrenceValidationCollector()
samples.qualifiedName.accept(qualifiedNameVisitor)
expect(qualifiedNameVisitor.allProductions.length).toBe(4)

var actionDecVisitor = new OccurrenceValidationCollector()
samples.actionDec.accept(actionDecVisitor)
expect(actionDecVisitor.allProductions.length).toBe(13)
})

})


describe("The GrammarError class", function () {

it("generates the correct error message", function () {
var grammarError = new GrammarError(
[new gast.Terminal(samples.ActionTok),
new gast.Terminal(samples.ActionTok)],
new gast.TOP_LEVEL("batata", []))

expect(grammarError.toString()).toBe("Occurrence Number Error: CONSUME with occurence number 1 " +
"appears 2 times in batata with the same occurrence number")
expect(grammarError.message).toBe("CONSUME with occurence number 1 appears 2 times in batata with " +
"the same occurrence number")
})

})
})

}
4 changes: 2 additions & 2 deletions test/parse/recognizer_lookahead_spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -813,11 +813,11 @@ module chevrotain.recognizer.lookahead.spec {
this.CONSUME1(TwoTok)
}},
{ALT: () => { // <-- this alternative starts with the same token as the previous one, ambiguity!
this.CONSUME1(OneTok)
this.CONSUME2(OneTok)
this.CONSUME1(ThreeTok)
}},
{ALT: () => {
this.CONSUME1(TwoTok)
this.CONSUME2(TwoTok)
}},
{ALT: () => {
this.CONSUME2(ThreeTok)
Expand Down
Loading

0 comments on commit 589f9b2

Please sign in to comment.