# LOG6302A - Lab1 - AST & visitor examples

## Load AST

In [68]:
from code_analysis import AST, ASTReader

reader = ASTReader()
ast = reader.read_ast("../example/example_1.php.ast.json")
ast_2 = reader.read_ast("../example/example_2.php.ast.json")

## Access AST information

In [69]:
'''
ast.get_root()       # Return the root node ID
ast.get_type(45)     # Return the type of node 45
ast.get_image(45)    # Return the image of node 45
ast.get_children(45) # Return the list of children
ast.get_parent(45)   # Return the list of parents
ast.get_position(45) # Return the position in source file as an
                     # array [line_begin, line_end, column_begin, column_end, token_begin, token_end]
'''

root = ast.get_root()
print(f"Root node ID is {root}")
print(f"Root type is {ast.get_type(root)}")

print(f"Node root children are {ast.get_children(root)}")
for node_id in ast.get_children(root):
  print(f"Node type of {node_id} is {ast.get_type(node_id)}")


Root node ID is 46
Root type is Start
Node root children are [42, 45]
Node type of 42 is FunctionStatement
Node type of 45 is FunctionCall


## Create a visitor that returns function definition position in source file

In [70]:
class ASTFunctionDefinitionVisitor:
  def __init__(self):
    self.ast = None
  def visit(self, ast: AST):
    self.ast = ast
    print(f"Visit AST from file {self.ast.get_filename()}")
    self.__visit(self.ast.get_root())

  def __visit(self, node_id: int):
    if self.ast.get_type(node_id) == "FunctionStatement":
      print(f"Function '{self.ast.get_image(node_id)}' definition is from "
      f"line {self.ast.get_position(node_id)[0]} to {self.ast.get_position(node_id)[1]}")

    for child_id in self.ast.get_children(node_id):
      self.__visit(child_id)

visitor = ASTFunctionDefinitionVisitor()
visitor.visit(ast)
visitor.visit(ast_2)

Visit AST from file ./example/example_1.php
Function 'f2' definition is from line 3 to 22
Visit AST from file ./example/example_2.php
Function 'f2' definition is from line 2 to 18
Function 'f1' definition is from line 20 to 37


## Create a visitor that returns function call position in source file

In [71]:
class ASTFunctionCallVisitor:
  def __init__(self):
    self.ast = None
  def visit(self, ast: AST):
    self.ast = ast
    print(f"Visit AST from file {self.ast.get_filename()}")
    self.__visit(self.ast.get_root())

  def __visit(self, node_id: int):
    if self.ast.get_type(node_id) == "FunctionCall":
      print(f"Function '{self.ast.get_image(node_id)}' is called "
            f"at line {self.ast.get_position(node_id)[0]}")

    for child_id in self.ast.get_children(node_id):
      self.__visit(child_id)

visitor = ASTFunctionCallVisitor()
visitor.visit(ast)
visitor.visit(ast_2)

Visit AST from file ./example/example_1.php
Function 'f2' is called at line 24
Visit AST from file ./example/example_2.php
Function 'f2' is called at line 32
Function 'f1' is called at line 39


### LOAD PHP WORDPRESS FILEs

In [116]:
from code_analysis import AST, ASTReader

with open('../code_to_analyze/wordpress_ast/filelist', "r") as f :
    filelist = f.readlines()
reader = ASTReader()
ASTFILES = [reader.read_ast("../code_to_analyze/wordpress_ast/" + file.strip()) for file in filelist]



### CREATE A VISITOR THAT RETURNS FUNCTION SQL POSITION IN SOURCE FILE

In [1]:
def find_exec_mysql(ast : AST, node_id) :
    if ast.get_type(node_id) == "MethodCall" and ast.get_image(node_id) == "exec" :
        
        parent = ast.get_parents(node_id)[0]
        children = ast.get_children(parent)
        tuples_children_1 = []
        for child in children :
            tuples_children_1.extend([(ast.get_type(id), ast.get_image(id),) for id in ast.get_children(child)])
        
        if ("Id", "mysql") in tuples_children_1 and ("Id", "exec") in tuples_children_1 :
            return True
    return False

def find_execute_method(ast : AST, node_id) :
    if ast.get_type(node_id) == "MethodCall" and ast.get_image(node_id) == "execute":
        return True
    return False

def find_sql_query(ast : AST, node_id) :
    if ast.get_type(node_id) == "FunctionCall" and ast.get_image(node_id) in ["mysql_query","mysqli_query"] :
        return True
    return False

NameError: name 'AST' is not defined

In [102]:
class CFGFunctionDatabaseVisitor:
  def __init__(self):
    self.ast = None
  def visit(self, ast: AST):
    self.ast = ast
    #print(f"Visit AST from file {self.ast.get_filename()}")
    self.__visit(self.ast.get_root())
    #get all types of nodes
  def __visit(self, node_id: int):
    if find_exec_mysql(self.ast, node_id) or find_execute_method(self.ast, node_id) or find_sql_query(self.ast, node_id):

      for child_id in self.ast.get_children(node_id):
        self.__visit(child_id)
      

for ast in ASTFILES:
    visitor = CFGFunctionDatabaseVisitor()
    visitor.visit(ast)

sql query 'exec' is called at line 130 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'exec' is called at line 139 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'execute' is called at line 168 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'execute' is called at line 188 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'execute' is called at line 200 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'execute' is called at line 217 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'execute' is called at line 239 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'execute' is called at line 257 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'execute' is called at line 265 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'execute' is called at line 276 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql query 'execute' is called at line 300 in file ./wp-includes/SimplePie/Cache/MySQL.php

sql q

In [118]:
from code_analysis import AST

file = reader.read_ast("../code_to_analyze/test_cve/2021_21705.php.ast.json")

In [119]:
def find_CVE_2021_21705(ast : AST, node_id) :
    if ast.get_image(node_id) == "filter_var" :
        print(ast.get_type(node_id))
        children = ast.get_children(node_id)
        print([(ast.get_type(id), ast.get_image(id)) for id in children])
        if len(children) != 2 :
            return False
        argumentlist_children_id = children[1]
        children_of_argumentlist = ast.get_children(argumentlist_children_id)
        
        images = [ast.get_image(id) for id in children_of_argumentlist]
        print(images)
        if "FILTER_VALIDATE_URL" in images :
            return True

    return False

In [120]:
class ASTFunctionCVEVisitor:
  def __init__(self):
    self.ast = None
  def visit(self, ast: AST):
    self.ast = ast
    #print(f"Visit AST from file {self.ast.get_filename()}")
    self.__visit(self.ast.get_root())
    #get all types of nodes
  def __visit(self, node_id: int):
    if find_CVE_2021_21705(self.ast, node_id) :
      print(f"find CVE_2021_21705 '{self.ast.get_image(node_id)}' is called "
            f"at line {self.ast.get_position(node_id)[0]}"
            f" in file {self.ast.get_filename()}")

      print()

    for child_id in self.ast.get_children(node_id):
      self.__visit(child_id)
      


visitor = ASTFunctionCVEVisitor()
visitor.visit(file)

FunctionCall
[('Id', 'filter_var'), ('ArgumentList', None)]
['url', 'FILTER_VALIDATE_URL']
