In [None]:
#|default_exp cli

In [None]:
#|export
from functools import partial

from fastcore.script import call_parse
from nbdev.processors import NBProcessor

In [None]:
from execnb.nbio import read_nb
from nbdev.export import nb_export

In [None]:
nb_path = "../nbs/favorita.ipynb"
nb = read_nb(nb_path)

In [None]:
nb.cells

[{'cell_type': 'code',
  'execution_count': None,
  'metadata': {},
  'outputs': [],
  'source': '#| default_exp favorita',
  'idx_': 0},
 {'attachments': {},
  'cell_type': 'markdown',
  'metadata': {},
  'source': '# Favorita\n\n## Description\nThe 2018 Kaggle competition was organized by Corporación Favorita, a major Ecuatorian grocery retailer. The Favorita dataset is comprised of item sales history and promotions information, with additional information of items and stores,regional and national holidays, among other. \n\nThe competition task consisted on forecasting sixteen days for the log-sales of particular item store combinations, for 210,654 series. \nThe original dataset is available in the [Kaggle Competition url](https://www.kaggle.com/c/favorita-grocery-sales-forecasting/).\n\nDuring the model\'s optimization we consider a balanced dataset of items and stores, for 217,944 bottom level series (4,036 items * 54 stores). We consider a geographical hierarchical structure of 4

In [None]:
#|export
tst_flags = 'datasets distributed matplotlib polars pyarrow scipy'.split()
to_skip = [
    'showdoc',
    'load_ext',
    'from nbdev'
]


def print_execs(cell):
    if 'exec' in cell.source: print(cell.source)

def print_hide(cell):
    if 'hide' in cell.directives_: print(cell.source)

def other_tests(cell):
    if len(cell.directives_) == 0:
        print(cell.source)

def get_markdown(cell):
    if cell.cell_type == "markdown":
        print(cell.source)

def get_code(cell):
    if cell.cell_type == "code":
        print(cell.source)

def extract_dir(cell, dirs):
    for directive in dirs.split(','):
        if directive in cell.directives_:
            print(cell.source)

def no_dir_and_dir(cell, dir):
    if len(cell.directives_) == 0:
        print(cell.source)

    if dir in cell.directives_:
        print(cell.source)

def get_all_tests(cell):
    if cell.cell_type == "code":

        if len(cell.directives_) == 0:
            print(cell.source)


        elif any(x in tst_flags + ['hide'] for x in cell.directives_):
            if not (x in cell.source for x in to_skip):
                print(cell.source)


In [None]:
tst_cell = nb.cells[0]

In [None]:
tst_cell

```json
{ 'cell_type': 'code',
  'execution_count': None,
  'idx_': 0,
  'metadata': {},
  'outputs': [],
  'source': '#| default_exp favorita'}
```

In [None]:
tst_flags + ['hide']

['datasets', 'distributed', 'matplotlib', 'polars', 'pyarrow', 'scipy', 'hide']

In [None]:
#|export
mapper = {
    'print_execs': print_execs,
    'print_hide': print_hide,
    'other_tests': other_tests,
    'get_markdown': get_markdown,
    'extract_dir': extract_dir,
    'no_dir_and_dir': no_dir_and_dir,
    'get_all_tests':get_all_tests
}

In [None]:
#|export
@call_parse
def print_dir_in_nb(nb_path:str,
                    dir:str=None,
                    dir_name:str=None,
                    ):
    if dir_name not in mapper.keys():
        raise ValueError(f'Choose processor from the the following: {mapper.keys()}')

    if dir_name == 'extract_dir':
        processor = NBProcessor(nb_path, partial(extract_dir, dir=dir))
        processor.process()
        return
    elif dir_name == 'no_dir_and_dir':
        processor = NBProcessor(nb_path, partial(no_dir_and_dir, dir=dir))
        processor.process()
        return

    processor = NBProcessor(nb_path, mapper[dir_name])
    processor.process()


In [None]:
NBProcessor(nb_path, procs=partial(extract_dir, dirs='hide,polars')).process()

#| eval: false
import matplotlib.pyplot as plt
from fastcore.test import test_eq
from nbdev.showdoc import show_doc
#| eval: false
verbose = True
group = 'Favorita200'
# group = 'Favorita500'
# group = 'FavoritaComplete'
directory = './data/favorita'
# directory = f's3://favorita'
#| eval: false
filter_items, filter_stores, filter_dates, raw_group_data = \
    FavoritaRawData._load_raw_group_data(directory=directory, group=group, verbose=verbose)

S_df, item_store_df, static_bottom, static_agg = \
                    FavoritaData._get_static_data(filter_items=filter_items, 
                                                  filter_stores=filter_stores,
                                                  items=raw_group_data['items'], 
                                                  store_info=raw_group_data['store_info'], 
                                                  temporal=raw_group_data['temporal'], 
                                                  verbose=verbose)
#| eval: fa

In [None]:
NBProcessor(nb_path, procs=get_all_tests).process()

show_doc(numpy_balance, title_level=4)
show_doc(numpy_ffill, title_level=4)
show_doc(numpy_bfill, title_level=4)
show_doc(one_hot_encoding, title_level=4)
show_doc(nested_one_hot_encoding, title_level=4)
show_doc(get_levels_from_S_df, title_level=4)
show_doc(FavoritaRawData, title_level=4)
show_doc(FavoritaRawData._load_raw_group_data, title_level=4)
show_doc(FavoritaData, title_level=4)
show_doc(FavoritaData.load_preprocessed, title_level=4)
show_doc(FavoritaData.load, title_level=4)
# #| hide
# #| eval: false
# # Test the equality of created and loaded datasets columns and rows
# static_agg1, static_bottom1, temporal_agg1, temporal_bottom1, S_df1 = \
#                         FavoritaData.load_preprocessed(directory=directory, group=group, cache=False)

# static_agg2, static_bottom2, temporal_agg2, temporal_bottom2, S_df2 = \
#                         FavoritaData.load_preprocessed(directory=directory, group=group)

# test_eq(len(static_agg1)+len(static_agg1.columns), 
#         len(

In [None]:
processor = NBProcessor(nb_path, partial(extract_dir, dirs='distributed'))

In [None]:
processor.process()

In [None]:
nb_export('cli.ipynb', lib_path='.', name='cli')