In [40]:
import os
import xlcalculator
import re


from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_validator import ExcelValidator
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_finder import TableFinder
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper

from ast_building.formula_parser import FormulaParser
from ast_building.series_implementer import SeriesImplementer

from formula_visualiser import FormulaVisualiser


In [41]:
current_directory = os.getcwd()

parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
data_directory = os.path.join(parent_directory, 'data')

project_name = 'test_excel_1'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")

In [42]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)

In [43]:
is_valid = ExcelValidator.validate_excel(excel_reduced)

In [44]:
if not is_valid:
    raise Exception("Excel file is not valid")

In [45]:
excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)

In [46]:
extracted_tables, data = TableFinder.find_tables(excel_reduced_clean)

In [47]:
series_data = SeriesExtractor.extract_table_details(extracted_tables, data)

In [48]:
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)

In [49]:
if not is_compatible:
    raise Exception("Excel file is not compatible")

In [50]:
series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, data=data)

In [51]:
series_mapping = SeriesMapper.map_series(series_dict)

In [52]:
series_iterator = SeriesIterator.iterate_series(series_dict)

In [53]:
series_list = [series for series in series_iterator if series.formulas != [None, None]]

In [54]:
for series in series_list:
    formula_1 = series.formulas[0]
    formula_1_ast = FormulaParser.parse_formula(formula_1)
    series_implementer = SeriesImplementer(series_mapping, sheet_name = series.worksheet.sheet_name)
    formula_1_ast_series = series_implementer.replace_range_nodes(formula_1_ast)

    formula_2 = series.formulas[1]
    formula_2_ast = FormulaParser.parse_formula(formula_2)
    formula_2_ast_series = series_implementer.replace_range_nodes(formula_2_ast)

In [55]:
class ASTGenerator:
    def __init__(self, formula_1_ast_series: xlcalculator.ast_nodes.ASTNode, formula_2_ast_series: xlcalculator.ast_nodes.ASTNode):
        self.formula_1_ast_series = formula_1_ast_series
        self.formula_2_ast_series = formula_2_ast_series

    @staticmethod
    def get_delta_between_nodes(node1_value: str, node2_value: str):

        def check_string(s):
            pattern = r"_\d_\d$"
            return bool(re.search(pattern, s))

        if node1_value == node2_value:
            return None
        elif check_string(node1_value) and check_string(node2_value) and node1_value[:-4] == node2_value[:-4]:
            node_1_start_index = node1_value[-3]
            node_1_end_index = node1_value[-1]
            node_2_start_index = node2_value[-3]
            node_2_end_index = node2_value[-1]
            if node_1_start_index != node_2_start_index:
                start_index_delta = int(node_2_start_index) - int(node_1_start_index)
                end_index_delta = int(node_2_end_index) - int(node_1_end_index)
                return (start_index_delta, end_index_delta)
                


    @staticmethod
    def compare_range_nodes(node1, node2, differing_nodes):
        """
        Recursively compare RangeNode objects in two AST nodes.
        
        Parameters:
        - node1: The current node in the first AST.
        - node2: The current node in the second AST.
        - differing_nodes: A list to collect node IDs where RangeNode objects differ.
        """
        if type(node1) != type(node2):
            return

        if isinstance(node1, xlcalculator.ast_nodes.RangeNode) and isinstance(node2, xlcalculator.ast_nodes.RangeNode):
            print(ASTGenerator.get_delta_between_nodes(node1.tvalue, node2.tvalue))

        if hasattr(node1, 'left') and hasattr(node2, 'left'):
            ASTGenerator.compare_range_nodes(node1.left, node2.left, differing_nodes)
        
        if hasattr(node1, 'right') and hasattr(node2, 'right'):
            ASTGenerator.compare_range_nodes(node1.right, node2.right, differing_nodes)
        
        if hasattr(node1, 'args') and hasattr(node2, 'args'):
            for child1, child2 in zip(node1.args, node2.args):
                ASTGenerator.compare_range_nodes(child1, child2, differing_nodes)

    def generate_ast(self, n: int) -> xlcalculator.ast_nodes.ASTNode:
        """Generates the Nth AST from the two formula ASTs, where the first AST provided is N=1"""
        differing_nodes = []
        return ASTGenerator.compare_range_nodes(self.formula_1_ast_series, self.formula_2_ast_series, differing_nodes)
        




class FormulaGenerator:
    """Creates instances of ASTGenerator given two formula_ast objects"""

    @staticmethod
    def get_ast_generator(
        formula_1_ast_series: xlcalculator.ast_nodes.ASTNode, formula_2_ast_series: xlcalculator.ast_nodes.ASTNode
    ) -> ASTGenerator:
        """Create an instance of ASTGenerator given two formula_ast objects"""
        ast_generator = ASTGenerator(formula_1_ast_series, formula_2_ast_series)
        return ast_generator
        


In [56]:
print(formula_1_ast_series)
print(formula_2_ast_series)

(c0a7fc24674646a48ffceca85c88dc3e_0_0) * (d53bfd7a798f4fa682b033cd9ade23b2_0_0)
(c0a7fc24674646a48ffceca85c88dc3e_1_1) * (d53bfd7a798f4fa682b033cd9ade23b2_1_1)


In [57]:
ast_generator = FormulaGenerator.get_ast_generator(formula_1_ast_series, formula_2_ast_series)

In [58]:
ast_generator.generate_ast(1)

(1, 1)
(1, 1)
