<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->

# Read-in Data

In [6]:
nbdev_path = Path(Path(".").resolve(), "example_nbs", "nbdev.ipynb")
nbdev_hq_path = Path(Path(".").resolve(), "example_nbs", "nbdev_high_quality.ipynb")
non_nbdev_path = Path(Path(".").resolve(), "example_nbs", "non_nbdev.ipynb")
non_nbdev_lq_path = Path(
    Path(".").resolve(), "example_nbs", "non_nbdev_low_quality.ipynb"
)

nbdev_nb = read_nb(nbdev_path)
nbdev_hq_nb = read_nb(nbdev_hq_path)
non_nbdev_nb = read_nb(non_nbdev_path)
non_nbdev_lq_nb = read_nb(non_nbdev_lq_path)

# NB Code Style

In [1]:
#| echo: false
#| output: asis
show_doc(run_nbqa_cmd)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L29){target="_blank" style="float:right; font-size:smaller"}

### run_nbqa_cmd

>      run_nbqa_cmd (cmd)

In [8]:
project_root: Path = find_project_root(tuple([str(Path(".").resolve())]))
assert os.path.basename(project_root) == "scilint"

In [2]:
#| echo: false
#| output: asis
show_doc(tidy)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L38){target="_blank" style="float:right; font-size:smaller"}

### tidy

>      tidy ()

In [3]:
#| echo: false
#| output: asis
show_doc(scilint_tidy)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L44){target="_blank" style="float:right; font-size:smaller"}

### scilint_tidy

>      scilint_tidy ()

# Quality relevant data extraction

## Definitions
* Function ($f$) = function in `# export` block
* Test ($\tau$) = call of exported function outside `# export` block

## Metrics
1. Tests per Function: $\mathrm{TpF}$ = $\dfrac{|\tau|}{f}$,when $f=0; \mathrm{TpF} = 0$
2. In-function Percentage: $\mathrm{IP} = $$\mathrm{statementsInFunction}:$$\mathrm{allStatements}$ 
3. MD to Code Ratio: $\mathrm{CMR}$ = $ \mathrm{markdownCells}:$$\mathrm{codeCells}$ 
4. Total Code Lines: $\mathrm{TCL}$ = $\mathrm{allCodeLines}$ 

## Helpers

In [4]:
#| echo: false
#| output: asis
show_doc(get_function_defs)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L48){target="_blank" style="float:right; font-size:smaller"}

### get_function_defs

>      get_function_defs (code, ignore_private_prefix=True)

In [11]:
# todo test me
# get_function_defs

In [5]:
#| echo: false
#| output: asis
show_doc(count_func_calls)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L60){target="_blank" style="float:right; font-size:smaller"}

### count_func_calls

>      count_func_calls (code, func_defs)

In [13]:
test_code = """self.hierarchical_topic_reduction(3); 
topic_reduction(3); 
lambda x: topic(x); 
hierarchical_topic_reduction[4]; 
hierarchical_topic_reduction(4); 
blabla()
"""
test_func_defs = [
    "topic",
    "topic_reduction",
    "blablabla",
    "hierarchical_topic_reduction",
]

In [14]:
assert count_func_calls(test_code, test_func_defs) == Counter(
    {
        "topic": 1,
        "topic_reduction": 1,
        "blablabla": 0,
        "hierarchical_topic_reduction": 2,
    }
)

In [15]:
nb_cell_code = r"""
def something():
    pass; pass # in x 2



!ls -l
if 1!= 2:
    print(4)
#| export

import pandas as pd # out
from sciflow.utils import lib_path, odbc_connect, query # out

#| export

def nb_to_sagemaker_pipeline(
    nb_path: Path,
    silent: bool = True,
):
    nb = read_nb(nb_path)  # in
    lib_name = get_config().get("lib_name")  # in
    module_name = find_default_export(nb["cells"])  # in
    
x = [1,2,3] # out
nb_to_sagemaker_pipeline() # out
"""

In [6]:
#| echo: false
#| output: asis
show_doc(replace_ipython_magics)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L71){target="_blank" style="float:right; font-size:smaller"}

### replace_ipython_magics

>      replace_ipython_magics (code)

In [17]:
throws = False
try:
    assert ast.parse(nb_cell_code)
except SyntaxError:
    throws = True
assert throws
assert type(ast.parse(replace_ipython_magics(nb_cell_code))) == ast.Module

In [7]:
#| echo: false
#| output: asis
show_doc(safe_div)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L78){target="_blank" style="float:right; font-size:smaller"}

### safe_div

>      safe_div (numer, denom)

In [19]:
assert safe_div(1, 1) == 1
assert safe_div(2, 1) == 2
assert safe_div(1, 2) == 0.5
assert safe_div(0, 1) == 0
assert safe_div(1, 0) == 0
assert safe_div(10, 1) == 10

In [8]:
#| echo: false
#| output: asis
show_doc(get_cell_code)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L82){target="_blank" style="float:right; font-size:smaller"}

### get_cell_code

>      get_cell_code (nb)

## 1. Calls-per-Function

In [9]:
#| echo: false
#| output: asis
show_doc(calls_per_func)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L94){target="_blank" style="float:right; font-size:smaller"}

### calls_per_func

>      calls_per_func (nb)

In [10]:
#| echo: false
#| output: asis
show_doc(mean_cpf)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L101){target="_blank" style="float:right; font-size:smaller"}

### mean_cpf

>      mean_cpf (nb)

In [11]:
#| echo: false
#| output: asis
show_doc(median_cpf)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L105){target="_blank" style="float:right; font-size:smaller"}

### median_cpf

>      median_cpf (nb)

In [24]:
assert mean_cpf(nbdev_nb).round(2) == 2.23
assert median_cpf(nbdev_nb) == 1

In [25]:
assert mean_cpf(read_nb(nbdev_path)).round(2) == 2.23
assert mean_cpf(read_nb(nbdev_hq_path)).round(2) == 2.5
assert mean_cpf(read_nb(non_nbdev_path)).round(2) == 1.0
assert mean_cpf(read_nb(non_nbdev_lq_path)).round(2) == 1.62

In [26]:
assert median_cpf(read_nb(nbdev_path)) == 1.0
assert median_cpf(read_nb(nbdev_hq_path)).round(2) == 1.5
assert median_cpf(read_nb(non_nbdev_path)).round(2) == 1.0
assert median_cpf(read_nb(non_nbdev_lq_path)).round(2) == 1.0

## 2. Asserts-to-Function Ratio

In [27]:
asserted_code = r"""
def something():
    pass; pass # in x 2
    
assert True

#| export

def nb_to_sagemaker_pipeline(
    nb_path: Path,
    silent: bool = True,
):
    nb = read_nb(nb_path)  # in
    lib_name = get_config().get("lib_name")  # in
    module_name = find_default_export(nb["cells"])  # in
    
x = [1,2,3] # out
assert len(x) > 2
assert something() is None # something +1

def tr():
    return True
    
def get_seg(num):
    return 2
    
assert(tr)
assert(tr()) # tr +1
assert(tr() == 4) # tr +1
assert(4 ==tr()) # tr +1
assert 0 != 0
assert "' '".join(tr(1)) == "00" # tr +1
assert len(get_seg(50)) == 50 # get_seg +1
assert max([int(x) for x in get_seg(100)]) == 99 # get_seg +1
"""

In [28]:
import nbformat as nbf

In [29]:
asserted_nb = nbf.v4.new_notebook()
asserted_nb["cells"] = [nbf.v4.new_code_cell(asserted_code)]

In [12]:
#| echo: false
#| output: asis
show_doc(afr)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L109){target="_blank" style="float:right; font-size:smaller"}

### afr

>      afr (nb)

In [31]:
afr(nbdev_nb)

1.3076923076923077

In [32]:
afr(nbdev_hq_nb)

1.6666666666666667

In [33]:
afr(non_nbdev_nb)

0.0

In [34]:
afr(non_nbdev_lq_nb)

0.0

## 3. In-line Asserts Per Function

In [13]:
#| echo: false
#| output: asis
show_doc(count_inline_asserts)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L122){target="_blank" style="float:right; font-size:smaller"}

### count_inline_asserts

>      count_inline_asserts (code, func_defs)

In [14]:
#| echo: false
#| output: asis
show_doc(iaf)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L139){target="_blank" style="float:right; font-size:smaller"}

### iaf

>      iaf (nb)

In [37]:
func_defs = get_function_defs(asserted_code)
inline_asserts_expected = Counter(
    {"something": 1, "tr": 4, "get_seg": 2, "nb_to_sagemaker_pipeline": 0}
)
inline_asserts_actual = count_inline_asserts(asserted_code, func_defs)

In [38]:
assert inline_asserts_actual == inline_asserts_expected

In [39]:
assert 0.0 == pd.Series(iaf(nbdev_nb)).median()
assert 0.0 == pd.Series(iaf(nbdev_hq_nb)).median()
assert 0.0 == pd.Series(iaf(non_nbdev_nb)).median()
assert 0.0 == pd.Series(iaf(non_nbdev_lq_nb)).median()

In [40]:
iaf(non_nbdev_nb)

Counter({'scalar': 0, 'py_advanced': 0, 'pandas': 0})

In [41]:
iaf(non_nbdev_lq_nb)

Counter({'get_traffic_text': 0,
         'get_experiment_segment': 0,
         'evaluate': 0,
         'serve_num_topics': 0,
         'get_num_topics': 0,
         'get_topic_sizes': 0,
         'get_topics': 0,
         'plot_wordcloud': 0})

In [42]:
assert inline_asserts_expected == iaf(asserted_nb)

In [15]:
#| echo: false
#| output: asis
show_doc(mean_iaf)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L145){target="_blank" style="float:right; font-size:smaller"}

### mean_iaf

>      mean_iaf (nb)

In [16]:
#| echo: false
#| output: asis
show_doc(median_iaf)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L149){target="_blank" style="float:right; font-size:smaller"}

### median_iaf

>      median_iaf (nb)

## Full Code Coverage?

How does pytest-cov do it?

## 2. In-function Percentage

In [17]:
#| echo: false
#| output: asis
show_doc(calc_ifp)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L153){target="_blank" style="float:right; font-size:smaller"}

### calc_ifp

>      calc_ifp (nb_cell_code)

In [46]:
assert (calc_ifp(nb_cell_code)) == (5 / (5 + 5)) * 100

In [18]:
#| echo: false
#| output: asis
show_doc(ifp)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L171){target="_blank" style="float:right; font-size:smaller"}

### ifp

>      ifp (nb)

In [48]:
assert ifp(nbdev_nb) >= 0
assert ifp(nbdev_hq_nb) >= 0
assert ifp(non_nbdev_nb) >= 0
assert ifp(non_nbdev_lq_nb) >= 0

## 3. Markdown to Code Percent

In [19]:
#| echo: false
#| output: asis
show_doc(mcp)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L182){target="_blank" style="float:right; font-size:smaller"}

### mcp

>      mcp (nb)

In [50]:
assert mcp(nbdev_nb) >= 0
assert mcp(nbdev_hq_nb) >= 0
assert mcp(non_nbdev_nb) >= 0
assert mcp(non_nbdev_lq_nb) >= 0

## 4. Total Code Length

In [20]:
#| echo: false
#| output: asis
show_doc(tcl)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L194){target="_blank" style="float:right; font-size:smaller"}

### tcl

>      tcl (nb)

In [52]:
assert tcl(nbdev_nb) >= 50
assert tcl(nbdev_hq_nb) >= 50
assert tcl(non_nbdev_nb) >= 50
assert tcl(non_nbdev_lq_nb) >= 50

In [21]:
#| echo: false
#| output: asis
show_doc(lint_nb)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L198){target="_blank" style="float:right; font-size:smaller"}

### lint_nb

>      lint_nb (nb_path, tpf_warn_thresh=None, ifp_warn_thresh=None,
>               afr_warn_thresh=1, iaf_med_warn_thresh=0,
>               iaf_mean_warn_thresh=0.5, mcp_warn_thresh=None,
>               tcl_warn_thresh=None, rounding_precision=3)

In [22]:
#| echo: false
#| output: asis
show_doc(format_quality_warning)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L234){target="_blank" style="float:right; font-size:smaller"}

### format_quality_warning

>      format_quality_warning (metric, warning_data, warn_thresh, direction)

In [23]:
#| echo: false
#| output: asis
show_doc(lint_nbs)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L239){target="_blank" style="float:right; font-size:smaller"}

### lint_nbs

>      lint_nbs (cpf_med_warn_thresh=1, cpf_mean_warn_thresh=1,
>                ifp_warn_thresh=20, afr_warn_thresh=1, iaf_med_warn_thresh=0,
>                iaf_mean_warn_thresh=0.5, mcp_warn_thresh=5,
>                tcl_warn_thresh=20000, rounding_precision=3,
>                csv_out_path='/tmp/lint.csv')

In [56]:
lint_report = lint_nbs()


*********************Begin Scilint Report*********************
"index" has: in_function_pct < 20
"non_nbdev_low_quality" has: asserts_function_ratio < 1
"non_nbdev" has: asserts_function_ratio < 1
"index" has: asserts_function_ratio < 1
"non_nbdev_low_quality" has: inline_asserts_per_function_mean < 0.5
"non_nbdev" has: inline_asserts_per_function_mean < 0.5
"non_nbdev" has: markdown_code_pct < 5
*********************End Scilint Report***********************


In [57]:
lint_report

Unnamed: 0,calls_per_function_median,calls_per_function_mean,in_function_pct,asserts_function_ratio,inline_asserts_per_function_median,inline_asserts_per_function_mean,markdown_code_pct,total_code_len
scilint,2.0,3.208,51.445,1.667,0.0,1.5,17.143,14294
nbdev_high_quality,1.5,2.5,44.118,1.667,0.0,1.0,30.769,4978
nbdev,1.0,2.231,50.725,1.308,0.0,0.846,30.769,4918
non_nbdev_low_quality,1.0,1.625,45.0,0.0,0.0,0.0,11.111,2955
non_nbdev,1.0,1.0,35.714,0.0,0.0,0.0,0.0,1233
index,,,0.0,0.0,,,77.778,8


In [24]:
#| echo: false
#| output: asis
show_doc(scilint_lint)

---

[source](https://github.com/newday-data/scilint/tree/{branch}/blob/main/scilint/scilint.py#L323){target="_blank" style="float:right; font-size:smaller"}

### scilint_lint

>      scilint_lint ()