In [1]:
# Build a formula evaluator

In [2]:
import pickle

def load_pickled_object(file_path):
    with open(file_path, 'rb') as file:
        obj = pickle.load(file)
    return obj

In [3]:
generic_formula_dictionary = load_pickled_object('generic_formula_dictionary.pkl')
series_dict = load_pickled_object('series_dict.pkl')

In [4]:
series_dict["Sheet1"]

[Series(series_id=SeriesId(sheet_name='Sheet1', series_header='col_1', series_header_cell_row=2, series_header_cell_column=2), worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='col_1', formulas=[None, None], values=[1, 3], header_location=<HeaderLocation.TOP: 'top'>, series_starting_cell=Cell(column=2, row=3, coordinate='B3', sheet_name=None, value=None, value_type=None, formula=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>),
 Series(series_id=SeriesId(sheet_name='Sheet1', series_header='col_2', series_header_cell_row=2, series_header_cell_column=3), worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='col_2', formulas=[None, None], values=[2, 4], header_location=<HeaderLocation.TOP: 'top'>, series_starting_cell=Cell(column=3, row=3, coordinate='C3', sheet_name=None, value=None, value_type=None, formula=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>)

In [5]:
import pandas as pd
import xlcalculator
import ast
from typing import List

import xlcalculator.parser

import copy
import ast
import xlcalculator.ast_nodes as ast_nodes
import xlcalculator.tokenizer as tokenizer
from typing import List

series_dict = {
    "Sheet1|col_1|2|2": pd.Series([1, 2, 3, 4, 5]),
    "Sheet1|col_2|2|3": pd.Series([6, 7, 8, 9, 10])
}

def get_values_from_series(series_tuple):

    series_ids, indexes, deltas = series_tuple
    series_values = series_dict[series_ids[0]]
    start_index, end_index = indexes

    return series_values[start_index:end_index]

def SUM(series_tuple):
    return sum(get_values_from_series(series_tuple))

def evaluate_sum(sum_formula: str):
    tree = ast.parse(sum_formula, mode="eval")

    local_env = {

        "SUM": SUM,
    }
    compiled = compile(tree, filename="<ast>", mode="eval")
    result = eval(compiled, {"__builtins__": {}}, local_env)
    return result

# Function to interpret and calculate the formula
def generate_formulas_list(formula_ast: xlcalculator.ast_nodes.ASTNode, start_index: int, end_index: int) -> List[xlcalculator.ast_nodes.ASTNode]:
    """
    Example:
    str(formula_ast) = SUM((('Sheet1|col_1|2|2',), (0, 1), (1, 1))) + SUM((('Sheet1|col_2|2|3',), (0, 1), (1, 1)))
    start_index = 0
    end_index = 3
    

the output is a list of 4 formula asts, using the start_index, end_index and the (1,1) value to determine the delta between values

the string representations of the asts are as follows:

[
SUM((('Sheet1|col_1|2|2',), (0, 1), (1, 1))) + SUM((('Sheet1|col_2|2|3',), (0, 1), (1, 1))),
SUM((('Sheet1|col_1|2|2',), (1, 2), (1, 1))) + SUM((('Sheet1|col_2|2|3',), (1, 2), (1, 1))),
SUM((('Sheet1|col_1|2|2',), (2, 3), (1, 1))) + SUM((('Sheet1|col_2|2|3',), (2, 3), (1, 1))),
SUM((('Sheet1|col_1|2|2',), (3, 4), (1, 1))) + SUM((('Sheet1|col_2|2|3',), (3, 4), (1, 1)))
]
"""
    formula_list = []
    for i in range(start_index, end_index + 1):
        # Deep copy the AST to modify without affecting the original
        new_ast = copy.deepcopy(formula_ast)
        
        # Adjust the indices in the operand nodes
        def adjust_indices(node):
            if isinstance(node, ast_nodes.FunctionNode):
                new_args = []
                for arg in node.args:
                    if isinstance(arg, ast_nodes.OperandNode):
                        # Correctly parsing the tuple from the string
                        parts = ast.literal_eval(arg.tvalue)
                        series_tuple, indexes, deltas = parts
                        # Update the index part with new values
                        updated_indexes = (indexes[0] + i, indexes[1] + i)
                        # Create a new tuple with the updated indexes
                        updated_tuple = (series_tuple, updated_indexes, deltas)
                        # Convert tuple back to the expected string format
                        updated_tvalue = f"(({repr(series_tuple[0])},), {updated_indexes}, {deltas})"
                        # Recreate the OperandNode with the updated tvalue
                        new_arg = ast_nodes.OperandNode(tokenizer.f_token(
                            tvalue=updated_tvalue, ttype="operand", tsubtype="text"
                        ))

                        new_args.append(new_arg)
                    else:
                        new_args.append(arg)
                node.args = new_args
            elif hasattr(node, 'left') and hasattr(node, 'right'):
                adjust_indices(node.left)
                adjust_indices(node.right)

        # Apply the index adjustments to the copied AST
        adjust_indices(new_ast)
        formula_list.append(new_ast)

    return formula_list

In [6]:
import xlcalculator.ast_nodes as ast_nodes
import xlcalculator.tokenizer as tokenizer

# Define the formula
formula = '(SUM("((\'Sheet1|col_1|2|2\',), (0, 1), (1, 1))")) + (SUM("((\'Sheet1|col_2|2|3\',), (0, 1), (1, 1))"))'

formula_ast = ast_nodes.OperatorNode(tokenizer.f_token(
    tvalue='+', ttype='operator-infix', tsubtype=''
))

formula_ast.left = ast_nodes.FunctionNode(tokenizer.f_token(
    tvalue="SUM", ttype="function", tsubtype=""
))

formula_ast.left.args = [
    ast_nodes.OperandNode(tokenizer.f_token(
        tvalue="(('Sheet1|col_1|2|2',), (0, 1), (1, 1))", ttype="operand", tsubtype="text"
    ))
]

formula_ast.right = ast_nodes.FunctionNode(tokenizer.f_token(
    tvalue="SUM", ttype="function", tsubtype=""
))

formula_ast.right.args = [
    ast_nodes.OperandNode(tokenizer.f_token(
        tvalue="(('Sheet1|col_2|2|3',), (0, 1), (1, 1))", ttype="operand", tsubtype="text"
    ))
]

assert [str(item) for item in generate_formulas_list(formula_ast, 0 , 3)] == ['(SUM("((\'Sheet1|col_1|2|2\',), (0, 1), (1, 1))")) + (SUM("((\'Sheet1|col_2|2|3\',), (0, 1), (1, 1))"))',
                                                                              '(SUM("((\'Sheet1|col_1|2|2\',), (1, 2), (1, 1))")) + (SUM("((\'Sheet1|col_2|2|3\',), (1, 2), (1, 1))"))',
                                                                              '(SUM("((\'Sheet1|col_1|2|2\',), (2, 3), (1, 1))")) + (SUM("((\'Sheet1|col_2|2|3\',), (2, 3), (1, 1))"))',
                                                                              '(SUM("((\'Sheet1|col_1|2|2\',), (3, 4), (1, 1))")) + (SUM("((\'Sheet1|col_2|2|3\',), (3, 4), (1, 1))"))']