In [1]:
import os

from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_extractor import TableExtractor
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper
from series_extraction.excel_validator import ExcelValidator

from ast_transformation.formula_generator import FormulaGenerator

from ast_building.formula_parser import FormulaParser

from excel_builder import ExcelBuilder

In [2]:
data_directory = "/Users/chrislittle/GitHub/speedsheet/excel-2-python/data"

project_name = 'test_excel_2'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")
excel_reduced_clean_filepath = os.path.join(data_directory, "excel_files_reduced_clean", f"{project_name}_reduced_clean.xlsx")
excel_raw_clean_filepath = os.path.join(data_directory, "excel_files_raw_clean", f"{project_name}_raw_clean.xlsx")
excel_reduced_clean_series_filepath = os.path.join(data_directory, "excel_files_reduced_clean_series", f"{project_name}_reduced_clean_series.xlsx")
excel_reduced_clean_series_python_filepath = os.path.join(data_directory, "excel_files_reduced_clean_series_python", f"{project_name}_reduced_clean_series_python.xlsx")
excel_raw_clean_series_python_filepath = os.path.join(data_directory, "excel_files_raw_clean_series_python", f"{project_name}_raw_clean_series_python.xlsx")

In [3]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)
is_valid = ExcelValidator.validate_excel(excel_reduced)
if not is_valid:
    raise Exception("Excel file is not valid")

excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)
excel_raw_clean = ExcelCleaner.clean_excel(excel_raw)
ExcelBuilder.create_excel_from_workbook(excel_reduced_clean.workbook_with_formulas, excel_reduced_clean_filepath)
ExcelBuilder.create_excel_from_workbook(excel_raw_clean.workbook_with_formulas,  excel_raw_clean_filepath)

extracted_tables, workbook_data = TableExtractor.extract_tables(excel_reduced_clean)
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)
if not is_compatible:
    raise Exception("Excel file is not compatible")

series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, workbook_data=workbook_data)
series_mapping = SeriesMapper.map_series(series_dict)
series_iterator = SeriesIterator.iterate_series(series_dict)

series_list = [series for series in series_iterator]

series_list_with_formulas = [
    series for series in series_list if series.formulas != [None, None]
]
series_list_with_values = [
    series for series in series_list if series.formulas == [None, None]
]

generic_formula_dictionary = {}

for series in series_list_with_formulas:
    formula_1, formula_2 = series.formulas
    
    if formula_1 is not None and formula_2 is not None:

        formula_1_ast_elements, _ = FormulaParser.parse_formula(formula_1)
        formula_2_ast_elements, _ = FormulaParser.parse_formula(formula_2)

        generic_formula = FormulaGenerator.generate_generic_formula(formula_1_ast_elements, formula_2_ast_elements)

        sheet_name = series.series_id.sheet_name

        generic_formula_dictionary[series.series_id] = (formula_1_ast_elements, generic_formula)

In [4]:
# formula_1_ast_elements, generic_formula = list(generic_formula_dictionary.values())[5]

In [5]:
# formula_1_ast_elements

In [6]:
from excel_utils import ExcelUtils
from formulas.tokens.operand import Range

In [7]:
def get_sheet_name_from_range(range: Range):
    """Get sheet name from range"""
    if "!" in range.name:
        return range.name.split("!")[0], range.name.split("!")[1]
    return None, range.name

def apply_delta_to_range(range: Range, delta):
    """Apply delta to range"""

    print(f"range: {range}")

    sheet_name, range_string = get_sheet_name_from_range(range)

    cell_start_column_delta, cell_start_row_delta, cell_end_column_delta, cell_end_row_delta = delta

    cell_start_column, cell_start_row, cell_end_column, cell_end_row, is_column_range = ExcelUtils.get_coordinates_from_range(range_string)

    if not is_column_range:
        cell_start_column += cell_start_column_delta
        cell_start_row += cell_start_row_delta
        cell_end_column += cell_end_column_delta
        cell_end_row += cell_end_row_delta

        new_range_string = ExcelUtils.get_range_from_coordinates(cell_start_column, cell_start_row, cell_end_column, cell_end_row, is_column_range)
        print(f"new_range_string: {new_range_string}")
        if sheet_name:
            return Range(f"{sheet_name}!{new_range_string}")
        return Range(new_range_string)
    return range


In [8]:
# generic_formula

In [9]:
# formula_1_ast_elements


def join_elements(formula_1_ast_elements):
    return "".join(element.name for element in formula_1_ast_elements)


In [10]:
def generate_formula_list(formula_1_ast_elements, generic_formula):

    list_length = 2

    formula_list = [join_elements(formula_1_ast_elements)]

    for _ in range(1, list_length):
        for index, element in enumerate(formula_1_ast_elements):
            if isinstance(element, Range):
                delta = generic_formula[index]
                print(f"delta: {delta}")
                new_element = apply_delta_to_range(element, delta)
                print(f"new_element: {new_element}")
                formula_1_ast_elements[index] = new_element
                print(f"formula_1_ast_elements: {formula_1_ast_elements}")
                formula_list.append(join_elements(formula_1_ast_elements))

    return formula_list



In [11]:
formula_list = generate_formula_list(formula_1_ast_elements, generic_formula)

delta: (0, 1, 0, 1)
range: E2 <Range>
new_range_string: E3:E3
new_element: E3 <Range>
formula_1_ast_elements: [E3 <Range>, * <Operator>, B2 <Range>]
delta: (0, 1, 0, 1)
range: B2 <Range>
new_range_string: B3:B3
new_element: B3 <Range>
formula_1_ast_elements: [E3 <Range>, * <Operator>, B3 <Range>]


In [12]:
formula_list

['E2*B2', 'E3*B2', 'E3*B3']

In [13]:
import formulas

formula = '=B2+SUM(A:A)'
ast = formulas.Parser().ast(formula)
pipe = ast[1].compile()
ranges = [item[0] for item in list(pipe.inputs.items())]

cell_value_mapping = {"B2": [1], "A:A": [2,3]}
inputs = [cell_value_mapping[range] for range in ranges]
result = pipe(*inputs)

result

Array([6.0], dtype=object)

In [14]:
import formulas

def evaluate_formula(formula: str, cell_value_mapping: dict):
    if "=" not in formula:
        formula = f"={formula}"
    ast = formulas.Parser().ast(formula)
    pipe = ast[1].compile()
    ranges = [item[0] for item in list(pipe.inputs.items())]
    inputs = [cell_value_mapping[range] for range in ranges]
    result = pipe(*inputs)
    return result

In [15]:
cell_value_mapping = {"B2": [1], "A:A": [2,3]}
evaluate_formula('=B2+SUM(A:A)', cell_value_mapping)



Array([6.0], dtype=object)

In [16]:
formula_list[0]

'E2*B2'

In [17]:
evaluate_formula(formula_list[0], cell_value_mapping)

KeyError: 'E2'