Skip to content

Commit

Permalink
Syntax tree traversals (#256)
Browse files Browse the repository at this point in the history
* Fix comments and formatting.

* Added missing comments.

* Introduce AstNode.

* scalafmt.

* Move EnhancedBlock into its own file.

* Export implicit + comments.

* Cleanup.

* More cleanup.

* Add AST_NODE base type to (hopefully) all correct child types.

* Cleanup + test.

* Completed AST traversals.

* scalafmt.

* Address review.

* Rename.

* Make constants static.
  • Loading branch information
fabsx00 committed Jul 17, 2019
1 parent 3ca2c64 commit fa586c9
Show file tree
Hide file tree
Showing 24 changed files with 392 additions and 101 deletions.
2 changes: 1 addition & 1 deletion build.sbt
Expand Up @@ -38,7 +38,7 @@ name := "codepropertygraph"
publish / skip := true

// parsed by project/Utils.scala
val fuzzyc2cpgVersion = "0.1.57"
val fuzzyc2cpgVersion = "0.1.70"

lazy val codepropertygraph = Projects.codepropertygraph
lazy val protoBindings = Projects.protoBindings
Expand Down
54 changes: 30 additions & 24 deletions codepropertygraph/src/main/resources/schemas/base.json
Expand Up @@ -26,7 +26,7 @@
{"id" : 6, "name": "FULL_NAME", "comment" : "Full name of an element, e.g., the class name along, including its package", "valueType" : "string", "cardinality" : "one"},
{"id": 21, "name": "CODE", "comment": "The code snippet the node represents", "valueType" : "string", "cardinality" : "one"},
{"id": 22, "name": "SIGNATURE", "comment": "Method signature", "valueType" : "string", "cardinality" : "one"},
{ "id": 26, "name" : "MODIFIER_TYPE", "comment" : "Indicates the modifier which is represented by a MODIFIER node. See modifierTypes", "valueType" : "string", "cardinality" : "one"},
{"id": 26, "name" : "MODIFIER_TYPE", "comment" : "Indicates the modifier which is represented by a MODIFIER node. See modifierTypes", "valueType" : "string", "cardinality" : "one"},

// Properties to characterize call-sites

Expand Down Expand Up @@ -69,7 +69,8 @@
"comment": "Node representing a source file. Often also the AST root",
"outEdges": [
{"edgeName": "AST", "inNodes": ["NAMESPACE_BLOCK"]}
]
],
"is" : ["AST_NODE"]
},

// Nodes of method declarations
Expand All @@ -78,7 +79,7 @@
"keys": ["NAME", "FULL_NAME", "SIGNATURE", "AST_PARENT_TYPE", "AST_PARENT_FULL_NAME",
"LINE_NUMBER", "COLUMN_NUMBER"],
"comment" : "A method/function/procedure",
"is": ["DECLARATION", "CFG_NODE"],
"is": ["DECLARATION", "CFG_NODE", "AST_NODE"],
"outEdges" : [
{"edgeName": "AST", "inNodes": ["METHOD_RETURN", "METHOD_PARAMETER_IN",
"MODIFIER", "BLOCK", "TYPE_PARAMETER"]},
Expand All @@ -89,21 +90,19 @@
{"id" : 34, "name" : "METHOD_PARAMETER_IN",
"keys": ["CODE", "ORDER", "NAME", "EVALUATION_STRATEGY", "TYPE_FULL_NAME", "LINE_NUMBER", "COLUMN_NUMBER"],
"comment" : "This node represents a formal parameter going towards the callee side",
"is": ["DECLARATION", "LOCAL_LIKE", "TRACKING_POINT"]
"is": ["DECLARATION", "LOCAL_LIKE", "TRACKING_POINT", "AST_NODE"]
},

{"id" : 3, "name" : "METHOD_RETURN",
"keys": ["CODE", "EVALUATION_STRATEGY", "TYPE_FULL_NAME", "LINE_NUMBER", "COLUMN_NUMBER"],
"comment" : "A formal method return",
"is": ["CFG_NODE", "TRACKING_POINT"]
},

// Modifier

{"id" : 300, "name" : "MODIFIER",
"keys" : ["MODIFIER_TYPE"],
"comment" : "The static-modifier",
"outEdges": []
"comment" : "A modifier, e.g., static, public, private",
"outEdges": [],
"is" : ["AST_NODE"]
},

// Types
Expand All @@ -121,33 +120,37 @@
"outEdges" : [
{"edgeName": "AST", "inNodes": ["TYPE_PARAMETER", "MEMBER", "MODIFIER"]},
{"edgeName": "VTABLE", "inNodes": ["METHOD"]}
]
],
"is" : ["AST_NODE"]
},
{"id" : 47, "name" : "TYPE_PARAMETER",
"keys" : ["NAME", "ORDER"],
"comment" : "Type parameter of TYPE_DECL or METHOD",
"outEdges" : []
"outEdges" : [],
"is" : ["AST_NODE"]
},
{"id" : 48, "name" : "TYPE_ARGUMENT",
"keys" : [],
"comment" : "Argument for a TYPE_PARAMETER that belongs to a TYPE or METHOD_INST. It binds another TYPE to a TYPE_PARAMETER",
"outEdges" : [
{"edgeName": "REF", "inNodes": ["TYPE"]},
{"edgeName": "BINDS_TO", "inNodes": ["TYPE_PARAMETER"]}
]
],
"is" : ["AST_NODE"]
},

{"id" : 9, "name" : "MEMBER",
"keys" : [ "CODE", "NAME", "TYPE_FULL_NAME"],
"comment" : "Member of a class struct or union",
"is": ["DECLARATION"]
"is": ["DECLARATION", "AST_NODE"]
},

{
"id":41,
"name": "NAMESPACE_BLOCK",
"keys": ["NAME", "FULL_NAME"],
"comment": "A reference to a namespace"
"comment": "A reference to a namespace",
"is" : ["AST_NODE"]
},

// Nodes that describe method content
Expand All @@ -174,7 +177,7 @@
{"id":23, "name" : "LOCAL",
"keys": ["CODE", "NAME", "CLOSURE_BINDING_ID", "TYPE_FULL_NAME", "LINE_NUMBER", "COLUMN_NUMBER"],
"comment": "A local variable",
"is": ["DECLARATION", "LOCAL_LIKE"]
"is": ["DECLARATION", "LOCAL_LIKE", "AST_NODE"]
},
{"id":27, "name": "IDENTIFIER",
"keys": ["CODE", "NAME", "ORDER", "ARGUMENT_INDEX", "TYPE_FULL_NAME", "LINE_NUMBER", "COLUMN_NUMBER"],
Expand Down Expand Up @@ -208,13 +211,15 @@
"comment":"A method instance which always has to reference a method and may have type argument children if the referred to method is a template",
"outEdges": [
{"edgeName": "AST", "inNodes": ["TYPE_ARGUMENT"]}
]
],
"is" : ["AST_NODE"]
},

{"id" : 14, "name" : "ARRAY_INITIALIZER",
"keys":[],
"outEdges": [],
"comment" : "Initialization construct for arrays"
"comment" : "Initialization construct for arrays",
"is" : ["AST_NODE"]
},

{"id":333, "name":"METHOD_REF",
Expand Down Expand Up @@ -255,17 +260,18 @@
// common base traits for nodes
"nodeBaseTraits" : [
{ "name" : "DECLARATION", "comment" : "", "hasKeys" : ["NAME"]},
{ "name" : "EXPRESSION", "comment" : "Expression as a specialisation of data flow objects", "hasKeys" : ["CODE", "ORDER"], "extends" : ["TRACKING_POINT", "CFG_NODE"]},
{ "name" : "LOCAL_LIKE", "comment" : "", "hasKeys" : ["NAME"]},
{ "name" : "CFG_NODE", "comment" : "", "hasKeys" : ["LINE_NUMBER", "COLUMN_NUMBER"], "extends": ["WITHIN_METHOD"]},
{ "name" : "TRACKING_POINT", "comment" : "", "hasKeys" : [], "extends": ["WITHIN_METHOD"]},
{ "name" : "WITHIN_METHOD", "comment" : "", "hasKeys" : []}
{ "name" : "EXPRESSION", "comment" : "Expression as a specialisation of tracking point", "hasKeys" : ["CODE", "ORDER"], "extends" : ["TRACKING_POINT", "CFG_NODE", "AST_NODE"]},
{ "name" : "LOCAL_LIKE", "comment" : "Formal input parameters, locals, and identifiers", "hasKeys" : ["NAME"]},
{ "name" : "CFG_NODE", "comment" : "Any node that can occur as part of a control flow graph", "hasKeys" : ["LINE_NUMBER", "COLUMN_NUMBER"], "extends": ["WITHIN_METHOD", "AST_NODE"]},
{ "name" : "TRACKING_POINT", "comment" : "Any node that can occur in a data flow", "hasKeys" : [], "extends": ["WITHIN_METHOD"]},
{ "name" : "WITHIN_METHOD", "comment" : "Any node that can exist in a method", "hasKeys" : []},
{ "name" : "AST_NODE", "comment": "Any node that can exist in an abstract syntax tree.", "hasKeys" : []}
],

"edgeTypes" : [

{"id" : 3, "name" : "AST", "comment" : "Syntax child" , "keys" : [] },
{"id" : 19, "name" : "CFG", "comment" : "Control flow", "keys" : [] },
{"id" : 3, "name" : "AST", "comment" : "Syntax tree edge" , "keys" : [] },
{"id" : 19, "name" : "CFG", "comment" : "Control flow edge", "keys" : [] },

{"id" : 9, "name" : "CONTAINS_NODE", "keys" : ["LOCAL_NAME", "INDEX"], "comment" : "Membership relation for a compound object"},
{"id" : 41, "name": "CAPTURED_BY", "comment" : "Connection between a captured LOCAL and the corresponding CLOSURE_BINDING", "keys": []},
Expand Down
Expand Up @@ -21,7 +21,8 @@
"name": "NAMESPACE",
"keys": ["NAME"],
"comment": "This node represents a namespace as a whole whereas the NAMESPACE_BLOCK is used for each grouping occurrence of a namespace in code. Single representing NAMESPACE node is required for easier navigation in the query language",
"outEdges": []
"is" : ["AST_NODE"],
"outEdges": []
},
{
"name" : "FILE", "outEdges" : [
Expand Down Expand Up @@ -56,7 +57,7 @@
{"id" : 33, "name" : "METHOD_PARAMETER_OUT",
"keys": ["CODE", "ORDER", "NAME", "EVALUATION_STRATEGY", "TYPE_FULL_NAME", "LINE_NUMBER", "COLUMN_NUMBER"],
"comment" : "This node represents a formal parameter going towards the caller side",
"is": ["DECLARATION", "TRACKING_POINT"],
"is": ["DECLARATION", "TRACKING_POINT", "AST_NODE"],
"outEdges" : [
{"edgeName": "TAGGED_BY", "inNodes": ["TAG"]},
{"edgeName": "EVAL_TYPE", "inNodes": ["TYPE"]}
Expand Down
12 changes: 8 additions & 4 deletions codepropertygraph/src/main/resources/schemas/java-specific.json
Expand Up @@ -14,27 +14,31 @@
"comment" : "A method annotation",
"outEdges" : [
{"edgeName": "AST", "inNodes": ["ANNOTATION_PARAMETER_ASSIGN"]}
]
],
"is" : ["AST_NODE"]
},

{"id" : 6, "name" : "ANNOTATION_PARAMETER_ASSIGN",
"keys" : ["CODE"],
"comment" : "Assignment of annotation argument to annotation parameter",
"outEdges" : [
{"edgeName": "AST", "inNodes": ["ANNOTATION_PARAMETER", "ARRAY_INITIALIZER", "ANNOTATION_LITERAL", "ANNOTATION"]}
]
],
"is" : ["AST_NODE"]
},

{"id" : 7, "name" : "ANNOTATION_PARAMETER",
"keys" : ["CODE"],
"comment" : "Formal annotation parameter",
"outEdges": []
"outEdges": [],
"is" : ["AST_NODE"]
},

{"id" : 49, "name" : "ANNOTATION_LITERAL",
"keys" : ["CODE", "NAME", "ORDER"],
"comment" : "A literal value assigned to an ANNOTATION_PARAMETER",
"outEdges" : [ ]
"outEdges" : [ ],
"is" : ["AST_NODE"]
},

{"name" : "ARRAY_INITIALIZER",
Expand Down
Expand Up @@ -52,6 +52,7 @@ object ProtoToOverflowDb extends App {
}

private def importCpgStruct(cpgProto: CpgStruct, overflowDb: OndiskOverflow): Unit = {

/** cpg proto nodes don't know their adjacent edges, but those are required for the OverflowDb serializer,
* so we need to build some helper maps to import the nodes */
val inEdgesByNodeId: JMap[NodeId, JMap[EdgeLabel, TLongSet]] = new JHashMap
Expand Down Expand Up @@ -91,4 +92,4 @@ object ProtoToOverflowDb extends App {

}

case class Config(cpg: File, writeTo: Option[File] = None)
case class Config(cpg: File, writeTo: Option[File] = None)
Expand Up @@ -22,7 +22,8 @@ class ProtoToOverflowDbTest extends WordSpec with Matchers {
referenceEdgeCount.toInt should be > 50
referenceSpecificPropertyCount.toInt should be > 10

val overflowdbFile = ProtoToOverflowDb.run(Config(new File(cpgBinZip), Some(Files.createTempFile("overflowdb", "bin").toFile)))
val overflowdbFile =
ProtoToOverflowDb.run(Config(new File(cpgBinZip), Some(Files.createTempFile("overflowdb", "bin").toFile)))
val fromStorage = Cpg.withStorage(overflowdbFile.getAbsolutePath).scalaGraph
fromStorage.V.count.head shouldBe referenceNodeCount
fromStorage.E.count.head shouldBe referenceEdgeCount
Expand Down
@@ -0,0 +1,83 @@
package io.shiftleft.cpgqueryingtests.steps

import io.shiftleft.cpgqueryingtests.codepropertygraph.{CpgFactory, LanguageFrontend}

class CAstTests extends CpgDataFlowTests {
val cpgFactory = new CpgFactory(LanguageFrontend.Fuzzyc, "cpgqueryingtests/src/test/resources/default.semantics")

val code =
"""
| int foo(int y) {
| int x = 10;
| if (x > 10) {
| return bar(x + 10);
| } else {
| if (y > x) {
| printf("reached");
| }
| }
| }
""".stripMargin

"should identify four blocks" in {
cpgFactory.buildCpg(code) { cpg =>
cpg.method.name("foo").ast.isBlock.l.size shouldBe 4
}
}

"should identify three control structures" in {
cpgFactory.buildCpg(code) { cpg =>
cpg.method
.name("foo")
.ast
.isControlStructure
.parserTypeName("IfStatement")
.l
.size shouldBe 2

cpg.method
.name("foo")
.ast
.isControlStructure
.parserTypeName("ElseStatement")
.l
.size shouldBe 1
}
}

"should identify conditions" in {
cpgFactory.buildCpg(code) { cpg =>
cpg.method.name("foo").ast.isControlStructure.condition.code.l shouldBe List("x > 10", "", "y > x")
}
}

"should allow filtering on conditions" in {
cpgFactory.buildCpg(code) { cpg =>
cpg.method
.name("foo")
.condition(".*x > 10.*")
.l
.size shouldBe 1

cpg.method
.name("foo")
.condition(".*x > 10.*")
.whenTrue
.ast
.isReturnNode
.code
.l shouldBe List("return bar(x + 10);")

cpg.method
.name("foo")
.condition(".*x > 10.*")
.whenFalse
.ast
.isCall
.code(".*printf.*")
.code
.l shouldBe List("printf(\"reached\")")
}
}

}
Expand Up @@ -24,7 +24,7 @@ class CpgDataFlowTests extends WordSpec with Matchers {
}

protected def getLiteralOfType(cpg: Cpg, typeName: String, literalName: String): Literal[HNil] = {
cpg.typeDecl.nameExact(typeName).method.literal.codeExact(literalName)
cpg.typeDecl.nameExact(typeName).method.isLiteral.codeExact(literalName)
}

protected def flowToResultPairs(flow: List[nodes.TrackingPoint]): List[(String, Option[Integer])] = {
Expand Down
Expand Up @@ -7,7 +7,6 @@ import io.shiftleft.codepropertygraph.generated.edges.ContainsNode
import io.shiftleft.diffgraph.DiffGraph
import io.shiftleft.diffgraph.DiffGraph._
import io.shiftleft.queryprimitives.steps.NewNodeSteps
import java.lang.{Long => JLong}

import io.shiftleft.codepropertygraph.generated.ModifierTypes
import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph
Expand Down
Expand Up @@ -96,6 +96,12 @@ import io.shiftleft.codepropertygraph.generated.Operators
*
**/
package object queryprimitives {

/**
* For a given name, determine whether it is the
* name of a "member access" operation, e.g.,
* "<operator>.memberAccess".
* */
def isGenericMemberAccessName(name: String): Boolean = {
(name == Operators.memberAccess) ||
(name == Operators.indirectComputedMemberAccess) ||
Expand Down
@@ -0,0 +1,16 @@
package io.shiftleft.queryprimitives.steps

import io.shiftleft.codepropertygraph.generated.nodes
import io.shiftleft.queryprimitives.steps.types.expressions.generalizations.AstNode
import shapeless.HNil

class AstNodeMethods(val node: nodes.AstNode) extends AnyVal {

// Shorthands to allow ast/children to be executed
// on single nodes.

def ast: AstNode[HNil] = node.start.ast

def children: AstNode[HNil] = node.start.astMinusRoot

}
Expand Up @@ -5,21 +5,14 @@ import io.shiftleft.codepropertygraph.generated.nodes.NodeVisitor
import io.shiftleft.queryprimitives.steps.visitormixins.ExpressionGeneralization

class CfgNodeMethods(val node: nodes.CfgNode) extends AnyVal {
def code: String = {
node.accept(CfgNodeToCode)
}
def code: String = node.accept(CfgNodeToCode)
}

private object CfgNodeToCode extends NodeVisitor[String] with ExpressionGeneralization[String] {
override def visit(node: nodes.Expression): String = {
node.code
}

override def visit(node: nodes.Method): String = {
node.name
}
override def visit(node: nodes.Expression): String = node.code

override def visit(node: nodes.MethodReturn): String = {
node.code
}
override def visit(node: nodes.Method): String = node.name

override def visit(node: nodes.MethodReturn): String = node.code
}

0 comments on commit fa586c9

Please sign in to comment.