In [1]:
import os
import xlcalculator
import re


from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_validator import ExcelValidator
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_finder import TableFinder
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper

from ast_building.formula_parser import FormulaParser
from ast_building.series_implementer import SeriesImplementer

from formula_visualiser import FormulaVisualiser


In [2]:
current_directory = os.getcwd()

parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
data_directory = os.path.join(parent_directory, 'data')

project_name = 'test_excel_1'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")

In [3]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)

In [4]:
is_valid = ExcelValidator.validate_excel(excel_reduced)

In [5]:
if not is_valid:
    raise Exception("Excel file is not valid")

In [6]:
excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)

In [7]:
extracted_tables, data = TableFinder.find_tables(excel_reduced_clean)

In [8]:
series_data = SeriesExtractor.extract_table_details(extracted_tables, data)

In [9]:
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)

In [10]:
if not is_compatible:
    raise Exception("Excel file is not compatible")

In [11]:
series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, data=data)

In [12]:
series_mapping = SeriesMapper.map_series(series_dict)

In [13]:
series_iterator = SeriesIterator.iterate_series(series_dict)

In [14]:
series_list = [series for series in series_iterator if series.formulas != [None, None]]

In [15]:
for series in series_list:
    formula_1 = series.formulas[0]
    formula_1_ast = FormulaParser.parse_formula(formula_1)
    series_implementer = SeriesImplementer(series_mapping, sheet_name = series.worksheet.sheet_name)
    formula_1_ast_series = series_implementer.replace_range_nodes(formula_1_ast)

    formula_2 = series.formulas[1]
    formula_2_ast = FormulaParser.parse_formula(formula_2)
    formula_2_ast_series = series_implementer.replace_range_nodes(formula_2_ast)

In [25]:
class ASTGenerator:
    def __init__(self, formula_1_ast_series: xlcalculator.ast_nodes.ASTNode, formula_2_ast_series: xlcalculator.ast_nodes.ASTNode):
        self.formula_1_ast_series = formula_1_ast_series
        self.formula_2_ast_series = formula_2_ast_series

    @staticmethod
    def get_delta_between_nodes(node1_value: str, node2_value: str):
        def check_string(s):
            pattern = r"_\d_\d$"
            return bool(re.search(pattern, s))

        if node1_value == node2_value:
            return None
        elif check_string(node1_value) and check_string(node2_value) and node1_value[:-4] == node2_value[:-4]:
            node_1_start_index = int(node1_value[-3])
            node_1_end_index = int(node1_value[-1])
            node_2_start_index = int(node2_value[-3])
            node_2_end_index = int(node2_value[-1])
            if node_1_start_index != node_2_start_index or node_1_end_index != node_2_end_index:
                start_index_delta = node_2_start_index - node_1_start_index
                end_index_delta = node_2_end_index - node_1_end_index
                return (start_index_delta, end_index_delta)

    def apply_deltas_to_range_nodes(self, node1: xlcalculator.ast_nodes.ASTNode, node2: xlcalculator.ast_nodes.ASTNode):
        if isinstance(node1, xlcalculator.ast_nodes.RangeNode) and isinstance(node2, xlcalculator.ast_nodes.RangeNode):
            deltas = self.get_delta_between_nodes(node1.tvalue, node2.tvalue)
            if deltas:
                start_index_delta, end_index_delta = deltas
                new_tvalue = f"{node1.tvalue[:-4]}_{start_index_delta}_{end_index_delta}_{node1.tvalue[-3]}"
                return xlcalculator.ast_nodes.RangeNode(xlcalculator.tokenizer.f_token(tvalue=new_tvalue, ttype="operand", tsubtype="range"))
            else:
                return node1

        elif hasattr(node1, 'args') and isinstance(node1, xlcalculator.ast_nodes.FunctionNode):
            modified_args = [self.apply_deltas_to_range_nodes(arg, node2.args[i]) for i, arg in enumerate(node1.args)]
            modified_node = xlcalculator.ast_nodes.FunctionNode(node1.token)
            modified_node.args = modified_args
            return modified_node

        elif hasattr(node1, 'left') and isinstance(node1, xlcalculator.ast_nodes.OperatorNode):
            modified_left = self.apply_deltas_to_range_nodes(node1.left, node2.left) if node1.left else None
            modified_right = self.apply_deltas_to_range_nodes(node1.right, node2.right) if node1.right else None
            modified_node = xlcalculator.ast_nodes.OperatorNode(node1.token)
            modified_node.left = modified_left
            modified_node.right = modified_right
            return modified_node

        else:
            return node1

    def get_ast_with_deltas(self):
        return self.apply_deltas_to_range_nodes(self.formula_1_ast_series, self.formula_2_ast_series)



class FormulaGenerator:
    """Creates instances of ASTGenerator given two formula_ast objects"""

    @staticmethod
    def get_ast_generator(
        formula_1_ast_series: xlcalculator.ast_nodes.ASTNode, formula_2_ast_series: xlcalculator.ast_nodes.ASTNode
    ) -> ASTGenerator:
        """Create an instance of ASTGenerator given two formula_ast objects"""
        ast_generator = ASTGenerator(formula_1_ast_series, formula_2_ast_series)
        return ast_generator
        


In [26]:
print(formula_1_ast_series)
print(formula_2_ast_series)

(fd3cfc4496014015af0b9c339f29dc7c_0_0) * (9106ff7eb22146ab9b810c0aa0b81814_0_0)
(fd3cfc4496014015af0b9c339f29dc7c_1_1) * (9106ff7eb22146ab9b810c0aa0b81814_1_1)


In [27]:
ast_generator = FormulaGenerator.get_ast_generator(formula_1_ast_series, formula_2_ast_series)

In [28]:
ast_delta = ast_generator.get_ast_with_deltas()

In [30]:
SeriesImplementer.serialise_ast_to_formula(ast_delta)

'(fd3cfc4496014015af0b9c339f29dc7c_1_1_0 * 9106ff7eb22146ab9b810c0aa0b81814_1_1_0)'