In [1]:
#hide
#default_exp test_ratio
from nbdev.showdoc import show_doc

In [108]:
nb = read_nb('test/test_clustering.ipynb')

In [109]:
ep = NoExportPreprocessor([], timeout=600, kernel_name='python3')

In [110]:
pnb = nbformat.from_dict(nb)

In [165]:
class_test_code = """
# export


class Topics:
    def __init__(self, documents, workers, speed):
        pass

    def get_num_topics(self):
        return 6

    def get_topic_sizes(self):
        return [1, 2, 3, 4, 5, 6], [1, 2, 3, 4, 5, 6]

    def get_topics(self, num_topics):
        return (
            ["cat", "sat", "mat", "mouse", "house", "grouse"],
            np.asarray([1, 1, 1, 1, 1, 1]),
            [1, 2, 3, 4, 5, 6],
        )

    def search_documents_by_topic(self, topic_num, num_docs):
        return (
            ["cat", "sat", "mat", "mouse", "house", "grouse"],
            np.asarray([1, 1, 1, 1, 1, 1]),
            [1, 2, 3, 4, 5, 6],
        )

    def generate_topic_wordcloud(self, topic_num):
        print("wordcloud")

def hierarchical_topic_reduction(num_topics):
    return ["cat", "sat", "mat"]
    
hierarchical_topic_reduction(7)
"""

# Quality relevant data extraction

## Definitions
* Function ($f$) = function in `# export` block
* Test ($\tau$) = call of exported function outside `# export` block

## Metrics
* Tests per Function: $\mathrm{TpF}$ = $\dfrac{|\tau|}{f}$,when $f=0; \mathrm{TpF} = 0$
* In-function Percentage: $\mathrm{IP} = $\# $\mathrm{lines-in-function}:$ \# $\mathrm{all-code-lines}$ 
* Code to MD Ratio: $\mathrm{CMR}$ = # $\mathrm{code-cells}:$ \# $ \mathrm{markdown-cells}$
* Total lines: \# $\mathrm{all-code-lines}$ 

In [111]:
md_cells = [c for c in pnb.cells if c['cell_type'] == 'markdown']

In [112]:
num_md_cells = len(md_cells)

In [113]:
code_cells = [c for c in pnb.cells if c['cell_type'] == 'code']

In [114]:
num_code_cells = len(code_cells)

In [166]:
cmr = num_code_cells/num_md_cells

In [219]:
def count_func_calls(code, func_defs):
    func_calls = Counter({k: 0 for k in func_defs})
    for stmt in ast.walk(ast.parse(code)):
        if isinstance(stmt, ast.Call):
            func_name = stmt.func.id if 'id' in stmt.func.__dict__ else stmt.func.attr
            if func_name in func_defs:
                if func_name in func_calls:
                    func_calls[func_name] += 1
    return func_calls

In [250]:
test_code = """self.hierarchical_topic_reduction(3); 
topic_reduction(3); 
lambda x: topic(x); 
hierarchical_topic_reduction[4]; 
hierarchical_topic_reduction(4); 
blabla()
"""
test_func_defs = ['topic', 'topic_reduction', 'blablabla', 'hierarchical_topic_reduction']

In [259]:
assert count_func_calls(test_code, test_func_defs) == Counter({'topic': 1,
         'topic_reduction': 1,
         'blablabla': 0,
         'hierarchical_topic_reduction': 2})

In [221]:
test_module = os.path.join(get_config().path("lib_path"), "test", "test_clustering.py")
with open(test_module, "r") as module_file:
    lines = module_file.readlines()
test_module_code = '\n'.join(lines)

In [223]:
def get_function_defs(code):
    func_names = []
    for stmt in ast.walk(ast.parse(code)):
        if isinstance(stmt, ast.FunctionDef) and not stmt.name.startswith('_'):
            func_names.append(stmt.name)
    return func_names

In [226]:
func_defs = get_function_defs('\n'.join(lines))
func_calls = count_func_calls(test_module_code, func_defs)

In [230]:
num_funcs = len(func_calls.keys())

In [233]:
num_tests = sum(func_calls.values())

In [238]:
def tpf(num_tests, num_funcs):
    return 0 if num_funcs == 0 else num_tests/num_funcs

In [240]:
assert tpf(1,1) == 1
assert tpf(2,1) == 2
assert tpf(1,2) == 0.5
assert tpf(0,1) == 0
assert tpf(1,0) == 0
assert tpf(10,1) == 10

True

# TODO

* HJandle tags