In [1]:
from analysis.cfg import cfg, cfg_cpp

In [2]:
from typing import Callable, Generic, ParamSpec, TypeVar, Concatenate, cast
from tree_sitter import Node
import functools


In [3]:
from analysis.cfg import lens
import ts_utils
import pathlib
from analysis import data

snippet = pathlib.Path("../data/raw/snippets/graph_utils_c.cc")
source = data.load_file_contents(snippet)
tree = ts_utils.parse(source, "cpp")


  from .autonotebook import tqdm as notebook_tqdm
2023-03-28 11:35:28.511 | DEBUG    | ts_utils.parsing:load_grammar:133 - Building language library, cache_dir=/Users/devjeetroy/.cache/tree-sitter-grammars, library_path=/Users/devjeetroy/.cache/tree-sitter-grammars/language_lib.so


In [4]:
from typing import Optional, Sequence

from toolz import itertoolz

BLOCK_DEFINITION_QUERIES = """
    (function_declarator
        declarator: (_) @function.name) @function_declarator
    (class_specifier
        name: (_) @class.name) @class
    """
def find_all_block_definitions(root: Node):
    results = {}

    grammar = ts_utils.load_grammar("cpp")
    query = grammar.query(
        BLOCK_DEFINITION_QUERIES  
    )
    captures = query.captures(root)
    grouped_captures = itertoolz.partition(2, captures)

    for function, function_name in grouped_captures:
        if function[1] == "function_declarator":
            curr = function[0]
            while curr.parent and curr.type != "function_definition":
                curr = curr.parent
            name = function_name[0].text.decode()
            node = curr
        else:
            name = function_name[0].text.decode()
            node = function[0]

        if name in results:
            name = f"{name}_L{node.start_point[0]}"
        results[name] = node

    return results


def compute_cyclomatic_complexity(root: Node):
    blocks = find_all_block_definitions(root)

    def compute_cyclomatic_complexity_for_block(node: Node):
        controlflow = cfg.build_cfg(cfg_cpp.visit, node)
        reduced_controlflow = cfg.build_basic_blocks(controlflow)
        return cfg.cyclomatic_complexity(reduced_controlflow)

    return {
        name: compute_cyclomatic_complexity_for_block(node)
        for name, node in blocks.items()
    }


In [5]:
annotations = data.load_yaml("../data/raw/annotations.yaml")
old_snippets = {"numbers_hrn", "numbers_hrd", "graph_utils"}
snippet_root = pathlib.Path("../data/raw/snippets")
scores_by_snippet = {}
all_block_scores_by_snippet = {}
for name, annotation in annotations.items():
    filename = annotation["file"]
    source = data.load_file_contents(snippet_root / filename)
    tree = ts_utils.parse(source, "cpp")
    complexity_by_block = compute_cyclomatic_complexity(tree.root_node)
    all_block_scores_by_snippet[name] = complexity_by_block
    if name in old_snippets:
        scores_by_snippet[name] = complexity_by_block[annotation["target_function"]]
    else:
        scores = tuple(complexity_by_block.values())
        scores_by_snippet[name] = sum(scores) / len(scores)



2023-03-28 11:35:29.602 | DEBUG    | ts_utils.parsing:load_grammar:133 - Building language library, cache_dir=/Users/devjeetroy/.cache/tree-sitter-grammars, library_path=/Users/devjeetroy/.cache/tree-sitter-grammars/language_lib.so
2023-03-28 11:35:29.606 | DEBUG    | ts_utils.parsing:load_grammar:133 - Building language library, cache_dir=/Users/devjeetroy/.cache/tree-sitter-grammars, library_path=/Users/devjeetroy/.cache/tree-sitter-grammars/language_lib.so
2023-03-28 11:35:29.666 | DEBUG    | ts_utils.parsing:load_grammar:133 - Building language library, cache_dir=/Users/devjeetroy/.cache/tree-sitter-grammars, library_path=/Users/devjeetroy/.cache/tree-sitter-grammars/language_lib.so
2023-03-28 11:35:29.671 | DEBUG    | ts_utils.parsing:load_grammar:133 - Building language library, cache_dir=/Users/devjeetroy/.cache/tree-sitter-grammars, library_path=/Users/devjeetroy/.cache/tree-sitter-grammars/language_lib.so
2023-03-28 11:35:29.728 | DEBUG    | ts_utils.parsing:load_grammar:133 -

In [6]:
source = data.load_file_contents(snippet_root / "insertion-sort-with-bug.cpp")
tree = ts_utils.parse(source, "cpp")
blocks = find_all_block_definitions(tree.root_node)

2023-03-28 11:35:32.668 | DEBUG    | ts_utils.parsing:load_grammar:133 - Building language library, cache_dir=/Users/devjeetroy/.cache/tree-sitter-grammars, library_path=/Users/devjeetroy/.cache/tree-sitter-grammars/language_lib.so
2023-03-28 11:35:32.670 | DEBUG    | ts_utils.parsing:load_grammar:133 - Building language library, cache_dir=/Users/devjeetroy/.cache/tree-sitter-grammars, library_path=/Users/devjeetroy/.cache/tree-sitter-grammars/language_lib.so


In [7]:
cdg = cfg.build_cfg(cfg_cpp.visit, blocks["sort_L14"])

In [10]:
scores_by_snippet

{'numbers_hrn': 7,
 'numbers_hrd': 9,
 'graph_utils': 2,
 'calculation': 1.5,
 'insertion_sort': 3.5,
 'money_class': 1.0,
 'number_checker': 2.0,
 'rectangle': 1.0}