Added facilities for LexicalInfo.

Improvements for OMetaInputWithMemo.
MaximTrushin · Oct 28, 2011 · d75b93b · d75b93b
1 parent a4d3c0a
commit d75b93b
Show file tree

Hide file tree

Showing 9 changed files with 278 additions and 87 deletions.
diff --git a/src/Boo.OMeta.Parser.Tests/BooParserTestFixture.boo b/src/Boo.OMeta.Parser.Tests/BooParserTestFixture.boo
@@ -22,7 +22,37 @@ partial class BooParserTestFixture:
 				assert m.Documentation is not null
 				Assert.AreEqual(normalize(m.Documentation), normalize(m.ToCodeString()))
 				assert input.IsEmpty, input.ToString()
-
+
+
+	[Test]
+	def TestEndSourceLocationForInlineClosures():
+		code = """foo = { a = 3;
+return a; }"""
+		EnsureClosureEndSourceLocation(code, 2, 10)
+
+
+	[Test]
+	def TestEndSourceLocationForBlockClosures():
+		code = """
+foo = def():
+    return a
+"""
+		EnsureClosureEndSourceLocation(code, 3, 13)
+
+
+	def EnsureClosureEndSourceLocation(code as string, line as int, column as int):		
+		parser = BooParser()
+
+		match parser.module(code):
+			case SuccessfulMatch(Input: input, Value: m=Module()):
+				assert m is not null
+				assert input.IsEmpty, input.ToString()
+				e = (m.Globals.Statements[0] as ExpressionStatement).Expression
+				cbe = (e as BinaryExpression).Right as BlockExpression
+				esl = cbe.Body.EndSourceLocation
+				Assert.AreEqual(line, esl.Line)
+				Assert.AreEqual(column, esl.Column)
+
 	def normalize(s as string):
 		return s.Trim().Replace("\r\n", "\n")
 

diff --git a/src/Boo.OMeta.Parser/AST.boo b/src/Boo.OMeta.Parser/AST.boo
@@ -358,10 +358,13 @@ def newStringInterpolation(items as List):
 def newConditionalExpression(condition, trueValue, falseValue):
 	return ConditionalExpression(Condition: condition, TrueValue: trueValue, FalseValue: falseValue)
 
-def newBlockExpression(parameters as List, body):
+def newBlockExpression(start as OMetaInput, end as OMetaInput, parameters as List, body):
 	node = BlockExpression(Body: body)
 	for p in parameters[0]:
 		node.Parameters.Add(p)
+
+	node.EndSourceLocation = LexicalInfo("", getLine(end), getColumn(end))
+
 	return node
 
 def newTypeofExpression(type):
@@ -440,7 +443,7 @@ def binaryOperatorFor(op):
 def newAssignment(l as Expression, r as Expression):
 	return [| $l = $r |]
 
-def newBlock(contents, doc):
+def newBlock(start as OMetaInput, end as OMetaInput, contents, doc):
 	b = Block()
 	match contents:
 		case Statement():
@@ -450,8 +453,23 @@ def newBlock(contents, doc):
 				if item:
 					b.Statements.Add(item)
 	b.Documentation  = doc
+	end = findPrevCharInput(end)
+	b.EndSourceLocation = LexicalInfo("", getLine(end), getColumn(end) + 1)//EndSourceLocation is the next symbol after the expression
+
 	return b
 
+def findPrevCharInput(input as OMetaInput):
+	while input:		
+		if isCharInput(input): return input
+		input = input.Prev		
+	return null
+
+def isCharInput(input as OMetaInput):
+	if input.IsEmpty or (not input.Head isa char): return false
+	if input.Head == char('\n') or input.Head == char('\r'): return false
+	return true
+
+
 def prepend(first, tail as List):
 	if first is null: return tail
 	return [first] + tail
@@ -501,4 +519,19 @@ def checkEnumerableTypeShortcut(type, stars as List):
 		(enumerable as GenericTypeReference).GenericArguments.Add(type)
 		type = enumerable
 	return enumerable
-
+
+def getLine(input as OMetaInput):
+	if input:
+		return input.GetMemo("line") or 1
+	else:
+		return -1
+
+def getColumn(input as OMetaInput):
+	if input:
+		return input.Position - getLineStart(input) + 1 //Columns enumeration starts from 1
+	else:
+		return -1
+
+
+def getLineStart(input as OMetaInput):
+	return (input.GetMemo("lineStart") or 1) cast int
diff --git a/src/Boo.OMeta.Parser/BooParser.boo b/src/Boo.OMeta.Parser/BooParser.boo
@@ -100,6 +100,9 @@ ometa BooParser < WhitespaceSensitiveTokenizer:
 
 	space = line_continuation | multi_line_comment | line_comment | super
 
+	here = "" ^ makeToken("here", null, input, input)
+	prev = "" ^ makeToken("prev", null, input.Prev, input.Prev)
+
 	sqs = (SQ, --( sqs_esc | (~('\'' | '\\' | '\r' | '\n'), _)) >> s, SQ) ^ makeString(s)		
 
 	dqs = (DQ, --( dqs_esc | (~('"' | '\\' | '\r' | '\n'), _)) >> s, DQ) ^ makeString(s)		
@@ -335,15 +338,15 @@ ometa BooParser < WhitespaceSensitiveTokenizer:
 
 	empty_block = (begin_block, (PASS, eol), end_block) ^ Block()
 
-	multi_line_block = ((begin_block_with_doc >> doc | begin_block), ++stmt >> stmts, end_block)  ^ newBlock(stmts, doc)
+	multi_line_block = (here >> start, (begin_block_with_doc >> doc | begin_block), ++stmt >> stmts, end_block)  ^ newBlock(getStart(start), input, stmts, doc)
 
 	macro_block = empty_block | multi_line_macro_block
 
-	multi_line_macro_block = ((begin_block_with_doc >> doc | begin_block), ++(stmt | type_member_stmt) >> stmts, end_block)  ^ newBlock(stmts, doc)
+	multi_line_macro_block = (here >> start, (begin_block_with_doc >> doc | begin_block), ++(stmt | type_member_stmt) >> stmts, end_block)  ^ newBlock(getStart(start), input, stmts, doc)
 
 	type_member_stmt = (type_def | method) >> tm ^ TypeMemberStatement(TypeMember: tm)
 
-	single_line_block = (COLON, stmt_line >> line) ^ newBlock(line, null)
+	single_line_block = (COLON >> start, stmt_line >> line) ^ newBlock(getStart(start), input, line, null)
 
 	begin_block = COLON, INDENT
 
@@ -416,7 +419,7 @@ ometa BooParser < WhitespaceSensitiveTokenizer:
 
 	stmt_unless	= (UNLESS, assignment >> e, block >> condition) ^ newUnlessStatement(e, condition)
 
-	false_block = ((ELIF, assignment >> e, block >> trueBlock, false_block >> falseBlock) ^ newBlock(newIfStatement(e, trueBlock, falseBlock), null)) | \
+	false_block = ((ELIF >> start, assignment >> e, block >> trueBlock, false_block >> falseBlock) ^ newBlock(getStart(start), input, newIfStatement(e, trueBlock, falseBlock), null)) | \
 		((ELSE, block >> falseBlock) ^ falseBlock) | ( "" ^ null)
 
 	stmt_return = (
@@ -430,12 +433,12 @@ ometa BooParser < WhitespaceSensitiveTokenizer:
 	block_expression = invocation_with_block | closure_block | dsl_friendly_invocation
 
 	invocation_with_block = (member_reference >> e and (e isa MethodInvocationExpression), \
-		(closure_block | (block >> b ^ newBlockExpression([[], null], b))) >> c ^ newInvocationWithBlock(e, c) ) 
+		(closure_block | (here >> start, block >> b ^ newBlockExpression(getStart(start), input, [[], null], b))) >> c ^ newInvocationWithBlock(e, c) ) 
 
 	dsl_friendly_invocation = (member_reference >> e and ((e isa MemberReferenceExpression) or (e isa ReferenceExpression)), \
 		(block) >> c) ^ newInvocation(e, [BlockExpression(Body: c)], null)
 
-	closure_block = ((DEF | DO), optional_parameters >> parameters, block >> body) ^ newBlockExpression(parameters, body)
+	closure_block = ((DEF | DO) >> start, optional_parameters >> parameters, block >> body) ^ newBlockExpression(getStart(start), getMemoEnd(input), parameters, body)
 
 	optional_parameters = method_parameters | ("" ^ [[], null])
 
@@ -598,7 +601,7 @@ ometa BooParser < WhitespaceSensitiveTokenizer:
 
 	type_literal = (TYPEOF, LPAREN, type_reference >> type, RPAREN) ^ newTypeofExpression(type)
 
-	closure = (LBRACE, closure_parameters >> parameters, closure_stmt_list >> body, RBRACE) ^ newBlockExpression(parameters, newBlock(body, null))
+	closure = (LBRACE >> start, closure_parameters >> parameters, (closure_stmt_list >> body ), prev >> end, RBRACE ^ newBlock(getStart(start), getEnd(end), body, null)) >> body ^ newBlockExpression(getStart(start), input, parameters, body)
 
 	closure_parameters = ((optional_parameter_list >> parameters, BITWISE_OR) ^ [parameters, null]) | ("" ^ [[],null])
 
@@ -710,3 +713,17 @@ ometa BooParser < WhitespaceSensitiveTokenizer:
 
 	eol = (++EOL | ~_) ^ null
 
+	def getStart(token as Token):
+		if token:
+			return token.start
+		else:
+			return null
+
+	def getEnd(token as Token):
+		if token:
+			return token.end
+		else:
+			return null
+
+
+
diff --git a/src/Boo.OMeta.Parser/WhitespaceSensitiveTokenizer.boo b/src/Boo.OMeta.Parser/WhitespaceSensitiveTokenizer.boo
@@ -3,9 +3,46 @@ namespace Boo.OMeta.Parser
 import System.Text
 import Boo.OMeta
 import Boo.Lang.PatternMatching
-import Boo.Adt
+//import Boo.Adt
+
+public class Token(object):
+
+	public final kind as string
+
+	public final value as string
+
+	public final start as OMetaInput
+
+	public final end as OMetaInput
+
+	public override def ToString() as string:
+		return "Token($(self.kind), $(self.value))"
+
+	public override def Equals(o as object) as bool:
+		if o is null:
+			return false
+		if self.GetType() is not o.GetType():
+			return false
+		other as Token = o
+		if string.op_Inequality(self.kind, other.kind):
+			return false
+		if string.op_Inequality(self.value, other.value):
+			return false
+		return true
+
+	public def constructor(kind as string, value as string):
+		super()
+		self.kind = kind
+		self.value = value
+
+	public def constructor(kind as string, value as string, start as OMetaInput, end as OMetaInput):
+		super()
+		self.kind = kind
+		self.value = value
+		self.start = start
+		self.end = end
+
 
-data Token(kind as string, value as string)
 
 ometa WhitespaceSensitiveTokenizer():
 
@@ -31,22 +68,19 @@ ometa WhitespaceSensitiveTokenizer():
 	http://docs.python.org/ref/indentation.html
 */
 
-	scanner = (
-		(
-			  (((_ >> t) and (t isa Token)) ^ t) // token introduced by processDedent
+	scanner = ( (((_ >> t) and (t isa Token)) ^ t) // token introduced by processDedent
 			| (((indentation >> i) and sameIndent(input, i)) ^ makeToken("eol"))
-			| (((indentation >> i) and largerIndent(input, i), $(processIndent(input, i))) >> value ^ value)			
-			| (((indentation >> i) and smallerIndent(input, i), $(processDedent(input, i)) >> value) ^ value)
+			| ((indentation >> i) and largerIndent(input, i), $(processIndent(input, i)))
+			| ((indentation >> i) and smallerIndent(input, i), $(processDedent(input, i)))
 			| ((--space, tokens >> t) ^ t)
-		) >> value
-	) ^ value
+	)
 
 	indentation = empty_lines, spaces
-	empty_lines = ~~empty_line, ++empty_line
+	empty_lines = ++empty_line
 	empty_line = spaces, newline
 	spaces = --space >> value ^ value
 	space = ' ' | '\t' | (newline and inWSA(input))
-	newline = '\n' | "\r\n" | "\r"
+	newline = '\n' | "\r\n" | "\r", $(newLine(input))
 	token[expected] = (scanner >> t and tokenMatches(t, expected)) ^ t
 
 	wsa = ~~_ and inWSA(input)
@@ -77,7 +111,10 @@ ometa WhitespaceSensitiveTokenizer():
 		return wsaLevel(input, wsaLevel(input) - 1)
 
 	def success(input as OMetaInput):
-		return SuccessfulMatch(input, null)
+		return success(input, null)
+
+	def success(input as OMetaInput, value):
+		return SuccessfulMatch(input, value)
 
 	def indentStack(input as OMetaInput) as List:
 		return input.GetMemo("indentStack") or [0]
@@ -91,12 +128,16 @@ ometa WhitespaceSensitiveTokenizer():
 	def largerIndent(input as OMetaInput, i):
 		if len(i) > getIndent(input):
 			return true
-
+
+	def smallerIndent(input as OMetaInput, i):
+		return len(i) < getIndent(input)
+
 	def processDedent(input as OMetaInput, i):
+		original = input
 		indent = List(indentStack(input))
 		while cast(int, indent[-1]) > len(i):
 			indent.Pop()
-			input = OMetaInput.Prepend(makeToken("dedent"), input)
+			input = OMetaInput.Prepend(makeToken("dedent"), input, original)
 
 		input = setIndentStack(input, indent)		
 		assert sameIndent(input, i)
@@ -105,32 +146,46 @@ ometa WhitespaceSensitiveTokenizer():
 	def indentLevel(input as OMetaInput, indent as int, value as object):
 		return SuccessfulMatch(input.SetMemo("indentLevel", indent), value)
 
-
 	def processIndent(input as OMetaInput, i):
 		newStack = List(indentStack(input))
 		newStack.Push(len(i))
 		return SuccessfulMatch(setIndentStack(input, newStack), makeToken("indent"))
 
-	def smallerIndent(input as OMetaInput, i):
-		return len(i) < getIndent(input)
-
 	def getIndent(input as OMetaInput) as int:
 		return indentStack(input)[-1]
 
+	def getLine(input as OMetaInput) as int:
+		return input.GetMemo("line") or 1
 
+	def setLine(input as OMetaInput, value as int):
+		return input.SetMemo("line", value)
+
+	def setLineStart(input as OMetaInput, value as int):
+		return input.SetMemo("lineStart", value)
+
+	def newLine(input as OMetaInput):		
+		input = setLineStart(input, input.Position)
+		return success(setLine(input, getLine(input) + 1))
+
+def setMemoStart(input as OMetaInput, value):
+	return SuccessfulMatch(input.SetMemo("start", value), null)
+
+def getMemoStart(input as OMetaInput):
+	return input.GetMemo("start")
+
+def getMemoEnd(input as OMetaInput):
+	return input.GetMemo("end")
+
+def getBack(value):
+	return value
+
 def tokenMatches(token as Token, expected):
 	return expected is token.kind
 
 def tokenValue(token as Token):
 	return null if token is null
 	return token.value
 
-def makeToken(kind):
-	return Token(kind, kind)
-
-def makeToken(kind, value):
-	return Token(kind, flatString(value))
-
 def makeString(*values):
 	buffer = StringBuilder()
 	for value in values:
@@ -153,4 +208,13 @@ def flatString(buffer as StringBuilder, value):
 			buffer.Append(value)
 		otherwise:
 			for item in value:
-				flatString buffer, item
+				flatString buffer, item
+
+def makeToken(kind):
+	return Token(kind, kind)
+
+def makeToken(kind, value):
+	return Token(kind, flatString(value))
+
+def makeToken(kind, value, start, end):
+	return Token(kind, flatString(value), start, end)				
diff --git a/src/Boo.OMeta.Tests/OMetaInputTest.boo b/src/Boo.OMeta.Tests/OMetaInputTest.boo
@@ -11,12 +11,12 @@ class OMetaInputTest:
 
 		arg = "foo"
 		input = OMetaInput.Empty()
-		input1 = OMetaInput.Prepend(arg, input)
-		input2 = OMetaInput.Prepend(arg, input)
+		input1 = OMetaInput.Prepend(arg, input, null)
+		input2 = OMetaInput.Prepend(arg, input, null)
 
 		assert input1 == input2
 
-		input3 = OMetaInput.Prepend("bar", input)
+		input3 = OMetaInput.Prepend("bar", input, null)
 		assert input1 != input3
 
 #	[Test]