# cell2func

> Convert desired notebook cells to functions. 

Detects function inputs automatically and function outputs semi-automatically. In the latter case, hints are provided to the developer to refine the list of outputs per each cell.

In [None]:
#| default_exp core.cell2func

In [None]:
#| export
import os
import re
import argparse
import shlex
from dataclasses import dataclass
from functools import reduce
from pathlib import Path
import sys
import ast
from IPython import get_ipython
from IPython.core.magic import (Magics, magics_class, line_magic,
                                cell_magic, line_cell_magic)
from IPython.core.magic_arguments import (argument, magic_arguments, parse_argstring)
import ipynbname
from sklearn.utils import Bunch
from fastcore.all import argnames
import nbdev

## FunctionProcessor

In [None]:
#|export
class FunctionProcessor (Bunch):
    def to_file (self, file_path, mode='w'):
        with open (file_path, mode=mode) as file:
            file.write (self.code)
    
    def write (self, file):
        file.write (self.code)
        
    def print (self):
        print (self.code)
    
    def update_code (
        self, 
        arguments=None, 
        return_values=None,
        tab_size=4,
        display=True
    ) -> None:
        if arguments is not None:
            self.arguments = arguments
        arguments = ', '.join (self.arguments)
        if return_values is not None:
            self.return_values = return_values
        return_values = ','.join (self.return_values)
        function_code = ''
        for line in self.original_code.splitlines():
            function_code += f'{" " * tab_size}{line}\n'
        if return_values != '':
            return_line = f'return {return_values}'
            return_line = f'{" " * tab_size}{return_line}\n'
        else:
            return_line = ''
        function_code = f'def {self.name}({arguments}):\n' + function_code + return_line
        self.code = function_code
        get_ipython().run_cell(function_code)
        if display:
            print (function_code)

## CellProcessor

In [None]:
#| export
class CellProcessor():
    """
    Processes the cell's code according to the magic command.
    """
    def __init__(self, **kwargs):
        self.function_info = Bunch()
        self.function_list = []
        self.file_name = ipynbname.name().replace ('.ipynb', '.py')
        self.nbs_folder = self.get_nbs_path ()
        self.lib_folder = self.get_lib_path ()
        nb_path = ipynbname.path ()
        index = nb_path.parts.index(self.nbs_folder.name)
        self.file_path = (self.nbs_folder.parent / self.lib_folder.name).joinpath (*nb_path.parts[index+1:])
        
        self.parser = argparse.ArgumentParser(description='Process some integers.')
        self.parser.add_argument('-i', '--input', type=str, nargs='+', help='input')
        self.parser.add_argument('-o', '--output', type=str, nargs='+', help='output')

        
    def cell2file (self, folder, cell):
        folder = Path(folder)
        folder.mkdir(parents=True, exist_ok=True)

        with open(folder / "module.py", "w") as file_handle:
            file_handle.write(cell)

        get_ipython().run_cell(cell)
                    
    def function (
        self,
        func, 
        cell,
        input=None,
        unknown_input=True,
        output=None,
        unknown_output=True,
        collect_variables_values=True,
        make_function=True,
        tab_size=4,
        update_previous_functions=True
    ) -> None:
        
        this_function = FunctionProcessor (
            idx=len(self.function_list), 
            original_code=cell, 
            name=func, 
            values_before=[]
        )
        if func not in self.function_info:
            self.function_info[func] = this_function
            self.function_list.append (this_function)
            
        idx = this_function.idx
        
        # get variables specific about this function
        if collect_variables_values:
            get_variables_before_code = f'\nkeep_variables ("{func}", "values_before", locals ())'
            get_ipython().run_cell(get_variables_before_code)
            
            get_variables_here_code = cell + f'\nkeep_variables ("{func}", "values_here", locals ())'
            get_ipython().run_cell(get_variables_here_code)
            values_before, values_here = this_function['values_before'], this_function['values_here']
            values_here = {k:values_here[k] for k in set(values_here).difference(values_before)}
            this_function['values_here'] = values_here
            print (values_here)
        
        root = ast.parse (cell)
        variables_here = {node.id for node in ast.walk(root) if isinstance(node, ast.Name) and not callable(eval(node.id))}
        print (variables_here)
        if idx > 0:
            variables_before = reduce (lambda x, y: x['variables_here'] | y['variables_here'], self.function_list[:idx])
        else:
            variables_before = []
        variables_here = sorted (variables_here.difference(variables_before))
        print (variables_here)
        this_function.update (variables_here=variables_here, variables_before=variables_here+variables_before, variables_after=[])
        
        if make_function:
            this_function.update_code ( 
                arguments=variables_before if unknown_input else input, 
                return_values=(variables_here+variables_before) if unknown_output else output,
                tab_size=tab_size
            )
            
        for function in self.function_list[:idx]:
            function.variables_after += [v for v in this_function.variables_here if v not in function.variables_after]
            if update_previous_functions and unknown_output:
                this_function.update_code (function, return_values=function.variables_after, tab_size=tab_size)
                
    def parse_signature (self, line):
        argv = shlex.split(line, posix=(os.name == 'posix'))
        
        function_name=argv[0]
        signature = dict(
            input=None,
            unknown_input=True,
            output=None,
            unknown_output=True
        )
        found_io = False
        for idx, arg in enumerate(argv[1:], 1):
            if arg and arg.startswith('-') and arg != '-' and arg != '->':
                found_io = True
                break
        if found_io:
            pars = self.parser.parse_args(argv[idx:])
            unknown_input = 'input' not in pars
            if not unknown_input:
                signature.update (input=pars.input, unknown_input=False)
            unknown_output = 'output' not in pars
            if not unknown_output:
                signature.update (output=pars.output, unknown_output=False)
            
        print (function_name, signature)
        return function_name, signature
    
    def write (self):
        with open (str(self.file_path), 'w') as file:
            for function in self.function_list:
                function.write (file)
                
    def print (self):
        for function in self.function_list:
            function.print ()
        
    def get_lib_path (self):
        return nbdev.config.get_config()['lib_path']
                   
    def get_nbs_path (self):
        return nbdev.config.get_config()['nbs_path']

## CellProcessorMagic

In [None]:
#| export
@magics_class
class CellProcessorMagic (Magics):
    """
    Base magic class for converting cells to modular functions.
    """
    def __init__(self, shell, **kwargs):
        super().__init__(shell)
        self.processor = CellProcessor (magic=self, **kwargs)
        
    @cell_magic
    def cell2file (self, folder, cell):
        self.processor.cell2file (folder, cell)
    
    @cell_magic
    def function (self, line, cell):
        "Converts cell to function"
        function_name, signature = self.processor.parse_signature (line)
        self.processor.function (function_name, cell, **signature)
    
    @line_magic
    def write (self, line):
        return self.processor.write ()
    
    @line_magic
    def print (self, line):
        return self.processor.print ()
        
    @line_magic
    def cell_processor (self, line):
        return self.processor
        
          
    @line_magic
    def match (self, line):
        p0 = '[a-zA-Z]\S*\s*\\([^-()]*\\)\s*->\s*\\([^-()]*\\)'
        p = '\\([^-()]*\\)'
        m = re.search (p0, line)
        if m is not None:
            inp, out = re.findall (p, line)
            print (inp)
            print (out)

## load_ipython_extension

In [None]:
#| export
#| hide
def load_ipython_extension(ipython):
    """
    This module can be loaded via `%load_ext core.cell2func` or be configured to be autoloaded by IPython at startup time.
    """
    magics = CellProcessorMagic(ipython)
    ipython.register_magics(magics)

## keep_variables

In [None]:
#| export
def keep_variables (function, field, variable_values, self=None):
    """
    Store `variables` in dictionary entry `self.variables_field[function]`
    """
    frame_number = 1
    while not isinstance (self, CellProcessor):
        fr = sys._getframe(frame_number)
        args = argnames(fr, True)
        if len(args)>0:
            self = fr.f_locals[args[0]]
        frame_number += 1
    variable_values = {k: variable_values[k] for k in variable_values if not k.startswith ('_') and not callable(variable_values[k])}
    function_info = getattr(self, 'function_info')
    function_info[function][field]=variable_values

## Usage

In [None]:
load_ipython_extension (get_ipython())

In [None]:
%%other fun (a, b, c) -> (d, e, f) -i a c -o b d
print ('yes')

['fun', '(a,', 'b,', 'c)', '->', '(d,', 'e,', 'f)', '-i', 'a', 'c', '-o', 'b', 'd']
fun
Namespace(input=['a', 'c'], output=['b', 'd'])


In [None]:
%%function my_final -i a b
a = 1
b = 2
print (f'a + b is {a+b}')
c = a*b
print (f'a * b is {c}')

my_final {'input': ['a', 'b'], 'unknown_input': False, 'output': None, 'unknown_output': False}
a + b is 3
a * b is 2
{}
{'b', 'a', 'c'}
['a', 'b', 'c']


TypeError: can only concatenate list (not "FunctionProcessor") to list

In [None]:
%debug

> [0;32m/tmp/ipykernel_388/2481124064.py[0m(77)[0;36mfunction[0;34m()[0m
[0;32m     75 [0;31m        [0mvariables_here[0m [0;34m=[0m [0msorted[0m [0;34m([0m[0mvariables_here[0m[0;34m.[0m[0mdifference[0m[0;34m([0m[0mvariables_before[0m[0;34m)[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     76 [0;31m        [0mprint[0m [0;34m([0m[0mvariables_here[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m---> 77 [0;31m        [0mthis_function[0m[0;34m.[0m[0mupdate[0m [0;34m([0m[0mvariables_here[0m[0;34m=[0m[0mvariables_here[0m[0;34m,[0m [0mvariables_before[0m[0;34m=[0m[0mvariables_here[0m[0;34m+[0m[0mvariables_before[0m[0;34m,[0m [0mvariables_after[0m[0;34m=[0m[0;34m[[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     78 [0;31m[0;34m[0m[0m
[0m[0;32m     79 [0;31m        [0;32mif[0m [0mmake_function[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  variables_here


['a', 'b', 'c']


ipdb>  variables_before


{'idx': 0, 'original_code': "a = 1\nb = 2\nprint (f'a + b is {a+b}')\nc = a*b\nprint (f'a * b is {c}')\n", 'name': 'final', 'values_before': {'In': ['', '#| default_exp core.cell2func', '#| export\nimport os\nimport argparse\nimport shlex\nfrom dataclasses import dataclass\nfrom functools import reduce\nfrom pathlib import Path\nimport sys\nimport ast\nfrom IPython import get_ipython\nfrom IPython.core.magic import (Magics, magics_class, line_magic,\n                                cell_magic, line_cell_magic)\nfrom IPython.core.magic_arguments import (argument, magic_arguments, parse_argstring)\nimport ipynbname\nfrom sklearn.utils import Bunch\nfrom fastcore.all import argnames\nimport nbdev', '#|export\nclass FunctionProcessor (Bunch):\n    def to_file (self, file_path, mode=\'w\'):\n        with open (file_path, mode=mode) as file:\n            file.write (self.code)\n    \n    def write (self, file):\n        file.write (self.code)\n        \n    def print (self):\n        print (

In [None]:
%print all

def final():
    a = 1
    b = 2
    print (f'a + b is {a+b}')
    c = a*b
    print (f'a * b is {c}')
    return a,b,c



In [None]:
a = %cell_processor 1

In [None]:
a.file_name

'cell2func'

In [None]:
final ()

a + b is 3
a * b is 2


(1, 2, 2)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()