In [1]:
import os

from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_validator import ExcelValidator
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_finder import TableFinder
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper

from ast_building.formula_parser import FormulaParser
from ast_building.series_implementer import SeriesImplementer

from ast_transformation.formula_generator import FormulaGenerator

In [2]:
current_directory = os.getcwd()

parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
data_directory = os.path.join(parent_directory, 'data')

project_name = 'test_excel_1'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")

In [3]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)

In [4]:
is_valid = ExcelValidator.validate_excel(excel_reduced)

In [5]:
if not is_valid:
    raise Exception("Excel file is not valid")

In [6]:
excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)

In [7]:
extracted_tables, data = TableFinder.find_tables(excel_reduced_clean)

In [8]:
series_data = SeriesExtractor.extract_table_details(extracted_tables, data)

In [9]:
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)

In [10]:
if not is_compatible:
    raise Exception("Excel file is not compatible")

In [11]:
series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, data=data)

In [12]:
series_mapping = SeriesMapper.map_series(series_dict)

In [13]:
series_iterator = SeriesIterator.iterate_series(series_dict)

In [14]:
series_list = [series for series in series_iterator if series.formulas != [None, None]]

In [26]:
for series in series_list:
    formula_1 = series.formulas[0]
    formula_1_ast = FormulaParser.parse_formula(formula_1)
    series_implementer = SeriesImplementer(series_mapping, sheet_name = series.worksheet.sheet_name)
    formula_1_ast_series = series_implementer.replace_range_nodes(formula_1_ast)

    formula_2 = series.formulas[1]
    formula_2_ast = FormulaParser.parse_formula(formula_2)
    formula_2_ast_series = series_implementer.replace_range_nodes(formula_2_ast)

    ast_generator = FormulaGenerator.get_ast_generator(formula_1_ast_series, formula_2_ast_series)
    ast_delta = ast_generator.get_ast_with_deltas()
    ast_delta_serialised = SeriesImplementer.serialise_ast_to_formula(ast_delta)
    print(ast_delta)

a6631b59dc7d4f28a6bb9ad9ba7c7a5d_0_0
a6631b59dc7d4f28a6bb9ad9ba7c7a5d_1_1
(a6631b59dc7d4f28a6bb9ad9ba7c7a5d_1_1_0) + (574a8624f3c64d76bd9730eeb92496ad_1_1_0)
SUM(6a70585f7ea346bcbb3f97a6b7e9c8f3_574a8624f3c64d76bd9730eeb92496ad_a6631b59dc7d4f28a6bb9ad9ba7c7a5d_1_1_0)
(0fcde356b6064dec8b63a4aa8850cd97_1_1_0) + (2846de20349f461387e90f74a7c45f13_1_1_0)
SUM(2846de20349f461387e90f74a7c45f13_0fcde356b6064dec8b63a4aa8850cd97_8284bf439cbf4c4899e5d4327e78d973_1_1_0)
9daa891129f24fd99811553d29807d4e_0_0
9daa891129f24fd99811553d29807d4e_1_1
(9daa891129f24fd99811553d29807d4e_1_1_0) + (cde7614edcb04e3cb1b678e0d6f1ade0_1_1_0)
SUM(baca3a3df08d4f0fb16065dfd457d1c9_0_1)
SUM(a17cc5d1cf444bc9be6fc5483926d220_0_1)
SUM(d24ea4c662d248f98a27138b619bb4b4_0_1)
(ec45176615e74da39cdfab313771d1b0_1_1_0) * (baca3a3df08d4f0fb16065dfd457d1c9_1_1_0)


In [17]:
ast_generator = FormulaGenerator.get_ast_generator(formula_1_ast_series, formula_2_ast_series)

In [18]:
ast_delta = ast_generator.get_ast_with_deltas()

In [19]:
SeriesImplementer.serialise_ast_to_formula(ast_delta)

'(ec45176615e74da39cdfab313771d1b0_1_1_0 * baca3a3df08d4f0fb16065dfd457d1c9_1_1_0)'