In [None]:
#default_exp translator

# Translator
> Translate Excel formulas on sheets to Python code.

In [None]:
#export
from ebb.parser import parse, parse_value, parse_formula
import ebb.types as t
from ebb.util import colname_to_num, num_to_colname
from ebb.pyframes import PyFrame

In [None]:
%load_ext autoreload
%autoreload 2

# Translation
Evaluation is useful, but in the end the core seems to be translation.

Goal of this part given a table-like grid of cells, write an equivalent Python program that can be run on the input data (cells without any dependencies) to generate the output (cells in the last column or cells without any dependencies).

In [None]:
#export

from collections.abc import Iterable

def construct_tuple(x, y):
    x = (x,) if isinstance(x, str) or not isinstance(x, Iterable) else tuple(x)
    y = (y,) if isinstance(y, str) or not isinstance(y, Iterable) else tuple(y)
    return x + y

infix_translate_map = {
    '\\+': lambda x, y: f'{x} + {y}',
    '\\*': lambda x, y: f'{x} * {y}',
    ',': construct_tuple

}
prefix_translate_map = {}
postfix_translate_map = {}

def choose(index, *values):
    print(index, values)
    if 1 <= index <= len(values):
        return values[index-1]
    else:
        raise ValueError
        
function_translate_map = {
    'CHOOSE': choose,
}

# We save the variables in a tuple because it's convenient to use ordering
# to compare formulas. If they have the same AST structure we can just check
# them for consistency one by one.
def translate(tree, variables=tuple()):  # Todo(Rik): str might translate to ref.
    if isinstance(tree, (int, float, bool, str)):
        return tree, variables
    elif isinstance(tree, t.InfixOp) and tree.op == ':':
        # Todo(Rik): handle ranges in formulas
        raise NotImplemented
    elif isinstance(tree, t.InfixOp):
        t_left, var_left = translate(tree.left, variables)
        t_right, var_right = translate(tree.right, variables)
        return infix_translate_map[tree.op](t_left, t_right), var_left + var_right
    elif isinstance(tree, t.PrefixOp):
        t_arg, var_arg = translate(tree.arg, variables)
        return prefix_translate_map[tree.op](t_arg), var_arg
    elif isinstance(tree, t.PostfixOp):
        t_arg, var_arg = translate(tree.arg, variables)
        return postfix_translate_map[tree.op](t_arg), var_arg
    elif isinstance(tree, t.Function):
        t_args, var_args = translate(tree.args, variables)
        # Gymnastics to handle one-argument functions
        # Todo(Rik): zero-argument functions?
        if not isinstance(t_args, tuple): t_args = (t_args,)
        return function_translate_map[tree.name](*t_args), var_args
    elif isinstance(tree, t.Ref):  # Create and return variable name
        return tree.to_string(), variables + (tree,)

In [None]:
translate(parse('= CHOOSE(1, 1, 2, 3)'))

1 (1, 2, 3)


(1, ())

Next, chaining functions. One cell = one line of code, I suppose. :)

In [None]:
def print_args(func):
    def wrapped(*a, **kw):
        try:
            return func(*a, **kw)
        except:
            print(a, kw)
            raise
    return wrapped

In [None]:
@print_args
def cell_to_lines(ref, sheet, depth=None, stop_at=None):
    stop_at = set() if stop_at is None else stop_at
    if depth == 0: return [], {ref}, {ref}
    depth = depth-1 if depth is not None else None
    
    code, variables = translate(parse(sheet[ref.row, ref.column]))
    if not variables:
        return [], {ref}, {ref}
    result = []
    leaves = set()
    for var in variables:
        if var in stop_at: continue
        this_code, stop_at_here, leaves_here = cell_to_lines(var, sheet, depth, stop_at)
        result += this_code
        stop_at |= stop_at_here
        leaves |= leaves_here
        
    result += [f'{ref.to_string()} = {code}']
    return result, stop_at | {ref}, leaves

sheet = t.PyFrame.from_list_of_rows([
    ['3', '4', '=A1+D2', '0'],
    ['4', '5', '=A2+D3', '=A1+D1'],
], ['A', 'B', 'C', 'D'])

lines, cells, leaves = cell_to_lines(t.Ref(row=0, column='C'), sheet, depth=2)
print('\n'.join(lines))
# print(get_dependency_tree(t.Ref(row=0, column='A'), sheet))

D2 = A1 + D1
C1 = A1 + D2


In [None]:
#export
def cell_to_function(name, ref, sheet, depth=None, stop_at=None):
    lines, _, leaves = cell_to_lines(ref, sheet, depth, stop_at)
    return '\n'.join([
        f'def {name}({", ".join(var.to_string() for var in leaves)}):'
    ] + [
        f'    {line}' for line in lines
    ] + [
        f'    return {ref.to_string()}'
    ]), leaves

In [None]:
print(cell_to_function('foo', t.Ref(row=0, column='C'), sheet)[0])

def foo(C1):
    return C1


In this case, we would expect some kind of "start-up" rows, and then an eventually stable set of cell formulas. Especially gnarly when there are two regimes.

# Making a small demo
Leverage ipydatagrid (from Bloomberg) to show a small demo with a sheet on the left and code on the right. Next up probably for loops. :)

Also could think about a backwards connection, i.e. editing the python code and updating the Excel sheet as necessary. What to do when putting a new line though... Something to think about.

For now though, I think continuing the row-interpretation work and making it go in the demo might be good!

In [None]:
sheet = t.PyFrame.from_list_of_rows([
    [3, 4, 0, '=A1+B1', '=C1*D1'],
    [4, 5, 3, '=A2+B2', '=C2*D2'],
    [7, 8, 2, '=A3+B3', '=E1*C3*D3'],
    [7, 8, 2, '=A4+B4', '=E2*C4*D4'],
], ['A', 'B', 'C', 'D', 'E'])

for col in sheet.columns:
    sheet.col[col] = [str(value) for value in sheet.col[col]]

In [None]:
from ipydatagrid import DataGrid
import ipywidgets as widgets
import pandas as pd

datagrid = DataGrid(pd.DataFrame(sheet), editable=True, base_column_size=128, base_row_size=30)
#                     layout={'height': '200px'})#, 'width': '800px'})
code_output = widgets.Output(layout={'border': '1px solid black'})
depth_widget = widgets.IntSlider(value=1, min=0, max=5)

def update_df(cell):
    sheet[cell['row']][cell['column']] = cell['value']
    with code_output:
        code_output.clear_output()        
        update_code(ref, sheet, depth_widget.value)
    
def update_code(ref, sheet, depth):
    code, variables = cell_to_function(f'calculate_{ref.to_string()}', ref, sheet, depth=depth)
    print(code)
    exec(code)
        

ref = t.Ref(row=3, column='E')
datagrid.on_cell_change(update_df)
with code_output:
    update_code(ref, sheet, depth_widget.value)
    
widgets.HBox(children=[widgets.VBox(children=[datagrid, depth_widget]), code_output])

HBox(children=(VBox(children=(DataGrid(auto_fit_params={'area': 'all', 'padding': 30, 'numCols': None}, base_c…

In [None]:
widgets

In [None]:
from nbdev.export import notebook2script; notebook2script()

Converted evaluator.ipynb.
Converted parser.ipynb.
Converted prototype.ipynb.
Converted translator.ipynb.
Converted types.ipynb.
Converted util.ipynb.
