Skip to content

Commit

Permalink
add ast extraction and ast node generation (#28)
Browse files Browse the repository at this point in the history
  • Loading branch information
bachish committed Apr 25, 2024
1 parent 2ad68dc commit efe618c
Show file tree
Hide file tree
Showing 22 changed files with 469 additions and 74 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,6 @@ bin/

### Mac OS ###
.DS_Store

### Generated files ###
/gen/
7 changes: 7 additions & 0 deletions generator/src/main/kotlin/org/ucfs/GeneratorException.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package org.ucfs

class GeneratorException(msg: String = "") : Exception("Generator exception$msg") {
companion object {
const val GRAMMAR_EXPECTED = "Only subclass of Grammar class can be used for parser generation"
}
}
47 changes: 47 additions & 0 deletions generator/src/main/kotlin/org/ucfs/IGeneratorFromGrammar.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package org.ucfs

import com.squareup.kotlinpoet.AnnotationSpec
import com.squareup.kotlinpoet.ClassName
import com.squareup.kotlinpoet.FileSpec
import com.squareup.kotlinpoet.TypeName
import org.ucfs.grammar.combinator.Grammar
import java.nio.file.Path

/**
* Common logic for generators that use a Grammar class
*/
interface IGeneratorFromGrammar {
val grammarClazz: Class<*>

/**
* Build a grammar object from Class<*>
*/
fun buildGrammar(grammarClazz: Class<*>): Grammar {
if (!Grammar::class.java.isAssignableFrom(grammarClazz)) {
throw GeneratorException(GeneratorException.GRAMMAR_EXPECTED)
}
val grammar = grammarClazz.getConstructor().newInstance()
if (grammar is Grammar) {
grammar.rsm
return grammar
}
throw GeneratorException(GeneratorException.GRAMMAR_EXPECTED)
}

fun generate(location: Path, pkg: String)
}

internal fun FileSpec.Builder.suppressWarningTypes(vararg types: String) {
if (types.isEmpty()) {
return
}

val format = "%S,".repeat(types.count()).trimEnd(',')
addAnnotation(
AnnotationSpec.builder(ClassName("", "Suppress"))
.addMember(format, *types)
.build()
)
}

fun TypeName.nullable(): TypeName = this.copy(nullable = true)
74 changes: 74 additions & 0 deletions generator/src/main/kotlin/org/ucfs/ast/AstExtractor.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
package org.ucfs.ast

import org.ucfs.GeneratorException
import org.ucfs.rsm.symbol.Nonterminal
import org.ucfs.sppf.node.*

class AstExtractor(val pkg: String) {
val nonterminalToClass = HashMap<Nonterminal, Class<*>>()

/**
* need to handle "many" in rules (many can make cycles in sppf)
*/
val used = HashSet<PackedSppfNode<*>>()
fun extract(sppf: ISppfNode?): Node {
val root = Node(null, 0)
extract(sppf, root, null)
return root.children.firstOrNull() ?: root
}

private fun getOffset(left: Node?, parent: Node): Int {
return if (left == null) {
parent.offset
} else {
left.offset + left.length
}
}

/**
* return rightest node of subtree
*/
private fun extract(sppf: ISppfNode?, parent: Node, left: Node?): Node? {
when (sppf) {
is PackedSppfNode<*> -> {
val newLeft = extract(sppf.leftSppfNode, parent, left)
return extract(sppf.rightSppfNode, parent, newLeft)
}

is IntermediateSppfNode<*> -> {
return extract(sppf.children.firstOrNull(), parent, left)
}

is SymbolSppfNode<*> -> {
val nodeClass = getNodeClass(sppf.symbol)
val ctor = nodeClass.getConstructor(Node::class.java, Int::class.java)

val node: Node = ctor.newInstance(parent, getOffset(left, parent)) as Node
node.left = left
parent.children.add(node)

val packedNode: PackedSppfNode<*> = sppf.children.first { pn -> !used.contains(pn) }
used.add(packedNode)

extract(packedNode, node, null)
parent.length += node.length
return node
}

is TerminalSppfNode<*> -> {
val node = TerminalNode(parent, getOffset(left, parent), sppf.terminal, left)
parent.children.add(node)
parent.length += sppf.terminal.toString().length
return node
}

null -> return null
else -> throw GeneratorException("Unknown sppf node type : $sppf")
}
}

private fun getNodeClass(nt: Nonterminal): Class<*> {
return nonterminalToClass.getOrPut(nt)
{ Class.forName("$pkg.${NodeClassesGenerator.getClassName(nt)}") }
}
}
76 changes: 76 additions & 0 deletions generator/src/main/kotlin/org/ucfs/ast/DotWriter.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package org.ucfs.ast

import java.nio.file.Files
import java.nio.file.Path

class DotWriter {
private var lastId = 0
var showSiblings = true
val ids: HashMap<Node, Int> = HashMap()
fun getId(node: Node): Int {
return ids.getOrPut(node) { lastId++ }
}

fun getDotView(root: Node, label: String = "AST"): String {
val view = StringBuilder("digraph g {")
view.append("label=\"$label\"")
view.append(handleNode(root))
view.append("}")
return view.toString()
}

private fun getNodeLabel(node: Node): String {
val view = StringBuilder("label = \"")
when (node) {
is TerminalNode<*> -> {
view.append(node.terminal.toString())
}

else -> {
view.append(node.javaClass.simpleName)
}
}
view.append("\noffset = ${node.offset}")
view.append("\nlength = ${node.length}")
view.append("\"")
return view.toString()
}

private fun getNodeView(node: Node): StringBuilder {
val view = StringBuilder("\n${getId(node)} [ ${getNodeLabel(node)}")
if (node is TerminalNode<*>) {
view.append(", color = green")
}
view.append("]")
return view
}

fun handleNode(node: Node): String {
val id = getId(node)
val view = getNodeView(node)
val left = node.left

if (showSiblings && left != null) {
view.append("\n$id -> ${getId(left)} [color=blue]")
}

for (child in node.children) {
view.append("\n$id -> ${getId(child)}")
view.append(handleNode(child))
}
return view.toString()
}

fun writeToFile(view: String, filePath: Path) {
val genPath = Path.of("gen", "ast")
Files.createDirectories(genPath)
val file = genPath.resolve(filePath).toFile()
file.writeText(view)
}

fun writeToFile(root: Node, fileName: String, label: String = "AST", showSiblings: Boolean) {
this.showSiblings = showSiblings
writeToFile(getDotView(root, label), Path.of("$fileName.dot"))
}

}
16 changes: 16 additions & 0 deletions generator/src/main/kotlin/org/ucfs/ast/Node.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.ucfs.ast

/**
* TODO add methods below
* - sppfNode (internalNode)
* - constructor (parent, sppfNode, offset)
*/
open class Node(
var parent: Node?,
var offset: Int,
) {
var length: Int = 0
open var left: Node? = null
var right: Node? = null
var children: ArrayList<Node> = ArrayList()
}
94 changes: 94 additions & 0 deletions generator/src/main/kotlin/org/ucfs/ast/NodeClassesGenerator.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package org.ucfs.ast

import com.squareup.kotlinpoet.*
import com.squareup.kotlinpoet.ParameterizedTypeName.Companion.parameterizedBy
import org.ucfs.IGeneratorFromGrammar
import org.ucfs.grammar.combinator.Grammar
import org.ucfs.grammar.combinator.regexp.*
import org.ucfs.rsm.symbol.Nonterminal
import org.ucfs.suppressWarningTypes
import java.nio.file.Path

/**
* Generate Ast node class for each nonterminal in grammar.
*/
class NodeClassesGenerator(override val grammarClazz: Class<*>) :
IGeneratorFromGrammar {
val grammar: Grammar = buildGrammar(grammarClazz)

private val superClass: Class<*> = Node::class.java

companion object {
fun getClassName(nt: Nt): String = getClassName(nt.nonterm)
fun getClassName(nt: Nonterminal): String = "${nt.name}Node"

//TODO add extensions `TerminalType: ITerminal`
val terminalType = TypeVariableName("TerminalType")
const val FUN_GET_CHILDREN = "getChildren"
const val OFFSET = "offset"
const val PARENT = "parent"
const val LENGTH = "length"

}

/**
* Generate class for each nonterminal in grammar
*/
override fun generate(location: Path, pkg: String) {
for (nt in grammar.nonTerms) {
val file = generateClassFile(nt, pkg)
file.writeTo(location)
}
}

/**
* Generate class for concrete nonterminal
*/
private fun generateClassFile(nt: Nt, pkg: String): FileSpec {
val fileName = getClassName(nt)
val ntClass = ClassName(pkg, fileName).parameterizedBy(terminalType)
val nodeClassBuilder = TypeSpec.classBuilder(ntClass.rawType.simpleName)
.addTypeVariable(terminalType)
.superclass(superClass.asTypeName())
.addFunction(generateConstructor())

val fileBuilder = FileSpec
.builder(pkg, ntClass.rawType.simpleName)
.addType(nodeClassBuilder.build())

fileBuilder.suppressWarningTypes("RedundantVisibilityModifier")
return fileBuilder.build()
}

/**
* Generate constructor
*/
private fun generateConstructor(): FunSpec {
return FunSpec.constructorBuilder()
.addParameter(PARENT, superClass)
.addParameter(OFFSET, Int::class)
.callSuperConstructor(PARENT, OFFSET)
.build()
}

private fun extractChildren(re: Regexp, isOptional: Boolean): List<PropertySpec> {
return when (re) {
is Alternative -> extractChildren(re.left, true) +
extractChildren(re.right, true)

is Concat -> extractChildren(re.head, isOptional) +
extractChildren(re.tail, isOptional)

is Empty -> listOf()
is Epsilon -> listOf()
is Many -> extractChildren(re.exp, true)
is DerivedSymbol -> listOf(generateProperty(re, isOptional))
}
}


private fun <T> generateProperty(value: T, isOptional: Boolean): PropertySpec {
TODO()
}

}
10 changes: 10 additions & 0 deletions generator/src/main/kotlin/org/ucfs/ast/TerminalNode.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package org.ucfs.ast

import org.ucfs.rsm.symbol.ITerminal

class TerminalNode<T : ITerminal>(parent: Node, offset: Int, val terminal: T?, override var left: Node?) :
Node(parent, offset) {
init {
length = terminal.toString().length
}
}
51 changes: 51 additions & 0 deletions generator/src/main/kotlin/org/ucfs/examples/Examples.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package org.ucfs.examples

import org.ucfs.ast.AstExtractor
import org.ucfs.ast.DotWriter
import org.ucfs.ast.NodeClassesGenerator
import org.ucfs.examples.dyck.DyckGrammar
import org.ucfs.examples.golang.SimpleGolang
import org.ucfs.grammar.combinator.Grammar
import org.ucfs.input.LinearInput
import org.ucfs.parser.Gll
import org.ucfs.rsm.writeRsmToDot
import org.ucfs.sppf.writeSppfToDot
import java.nio.file.Path


object Examples {
fun generateAst(grammar: Grammar, pkg: String, input: String, name: String) {
val grammarClass = grammar::class.java
NodeClassesGenerator(grammarClass).generate(Path.of("generator", "src", "main", "kotlin"), pkg)
val gll = Gll.gll(grammar.rsm, LinearInput.buildFromString(input))
val sppf = gll.parse().first
writeSppfToDot(sppf!!, Path.of("${name}.dot").toString(), "${grammarClass.simpleName} SPPF for $input")
val ast = AstExtractor(pkg).extract(sppf)
val label = "${grammarClass.simpleName} AST for $input"
DotWriter().writeToFile(
ast,
name,
label,
false
)
DotWriter().writeToFile(
ast,
"$name with siblings",
label,
true
)

}
}


fun main() {
writeRsmToDot(DyckGrammar().rsm, "rsm.dot")
Examples.generateAst(SimpleGolang(), "org.ucfs.examples.golang", "r 1 + 1 ;", "simple golang")
Examples.generateAst(SimpleGolang(), "org.ucfs.examples.golang", "r 1 + 1 ; 1 ; r 1 ;", "simple golang")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ( ) ] ", "1_dyck")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ( ) ] { }", "2_dyck")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ] { } [ ( ) ]", "3_dyck")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", " [ { } ( ) ] ", "3_dyck")
Examples.generateAst(DyckGrammar(), "org.ucfs.examples.dyck", "[ ] { { } ( ) } [ ( ) ]", "3_dyck")
}
Loading

0 comments on commit efe618c

Please sign in to comment.