# AST Analysis Notebook

In [1]:
import ast
import itertools
from typing import Any
import astor
print("Done importing")

Done importing


Show version 1 AST

In [2]:
compiled_ast = astor.parse_file("sample_files/funcs_v1.py")
print(astor.dump_tree(compiled_ast))

Module(
    body=[
        FunctionDef(name='assign_numbers',
            args=arguments(posonlyargs=[],
                args=[],
                vararg=None,
                kwonlyargs=[],
                kw_defaults=[],
                kwarg=None,
                defaults=[]),
            body=[
                Assign(targets=[Name(id='x')], value=Constant(value=1, kind=None), type_comment=None),
                Assign(targets=[Name(id='y')],
                    value=BinOp(left=Constant(value=2, kind=None), op=Add, right=Constant(value=3, kind=None)),
                    type_comment=None)],
            decorator_list=[],
            returns=None,
            type_comment=None),
        FunctionDef(name='func_with_params',
            args=arguments(posonlyargs=[],
                args=[arg(arg='a', annotation=None, type_comment=None),
                    arg(arg='b', annotation=None, type_comment=None),
                    arg(arg='c', annotation=None, type_comment=None)],
        

Show version 2 AST

In [3]:
compiled_ast = astor.parse_file("sample_files/funcs_v2.py")
print(astor.dump_tree(compiled_ast))

Module(
    body=[
        FunctionDef(name='assign_numbers',
            args=arguments(posonlyargs=[],
                args=[],
                vararg=None,
                kwonlyargs=[],
                kw_defaults=[],
                kwarg=None,
                defaults=[]),
            body=[
                Assign(targets=[Name(id='x')], value=Constant(value=1, kind=None), type_comment=None),
                Assign(targets=[Name(id='y')],
                    value=BinOp(left=Constant(value=2, kind=None), op=Add, right=Constant(value=3, kind=None)),
                    type_comment=None),
                Assign(targets=[Name(id='z')],
                    value=BinOp(
                        left=BinOp(left=Constant(value=4, kind=None), op=Add, right=Constant(value=5, kind=None)),
                        op=Add,
                        right=Constant(value=6, kind=None)),
                    type_comment=None)],
            decorator_list=[],
            returns=None,
            t

Create a class to print function names from an AST

In [4]:
class PrintFunction(ast.NodeTransformer):
    def visit_FunctionDef(self, node):
        self.generic_visit(node)
        print(node.name)
        # if return type isn't specified, it defaults to None
        print(node.returns)
        for arg in node.args.args:
            print(arg.arg)

PrintFunction().visit(compiled_ast)
print("Done.")

assign_numbers
None
func_with_params
None
a
b
c
same_func
None
a
Done.


Create a Node class to store AST nodes

In [5]:
class GraphNode:
    def __init__(self, name, num_params, returns, body):
        self.name = name
        self.num_params = num_params
        self.returns = returns
        self.body = body

    def __eq__(self, other):
        if self is other:
            return True
        if not isinstance(other, GraphNode):
            return False
        return (self.name == other.name and
               self.num_params == other.num_params and
               self.returns == other.returns)

    def __hash__(self):
        return hash(self.name) + hash(self.num_params)

    def __repr__(self):
        return f"{self.name}, {self.num_params} params -> {self.returns}"

Now make a visitor function that builds a graph

In [6]:
class GraphCreator(ast.NodeTransformer):
    def __init__(self, *args, **kwargs):
        super().__init__()
        self.nodes = {}

    def visit_FunctionDef(self, node: ast.FunctionDef) -> Any:
        self.generic_visit(node)
        gNode = GraphNode(node.name, len(node.args.args), node.returns, node.body)
        self.nodes[gNode] = gNode

    def print_nodes(self):
        for node in self.nodes:
            print(node)

Parse the ASTs and find only the nodes that have changed their bodies

In [10]:
compiled_ast_v1 = astor.parse_file("sample_files/funcs_v1.py")
compiled_ast_v2 = astor.parse_file("sample_files/funcs_v2.py")
g1 = GraphCreator()
g2 = GraphCreator()
g1.visit(compiled_ast_v1)
g2.visit(compiled_ast_v2)

def nodes_are_same(node1, node2):
    return astor.dump_tree(node1) == astor.dump_tree(node2)

same_func_nodes = g1.nodes.keys() & g2.nodes.keys()
print(f"Number of similar nodes: {len(same_func_nodes)}")
for node in same_func_nodes:
    if not nodes_are_same(g1.nodes[node].body, g2.nodes[node].body):
        print(node)

print("Done!")

Number of similar nodes: 3
assign_numbers, 0 params -> None
func_with_params, 3 params -> None
Done!
