In [1]:
import os
import xlcalculator


from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_validator import ExcelValidator
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_finder import TableFinder
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper

from ast_building.formula_parser import FormulaParser
from ast_building.series_implementer import SeriesImplementer

from formula_visualiser import FormulaVisualiser


In [2]:
current_directory = os.getcwd()

parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
data_directory = os.path.join(parent_directory, 'data')

project_name = 'test_excel_1'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")

In [3]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)

In [4]:
is_valid = ExcelValidator.validate_excel(excel_reduced)

In [5]:
if not is_valid:
    raise Exception("Excel file is not valid")

In [6]:
excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)

In [7]:
extracted_tables, data = TableFinder.find_tables(excel_reduced_clean)

In [8]:
series_data = SeriesExtractor.extract_table_details(extracted_tables, data)

In [9]:
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)

In [10]:
if not is_compatible:
    raise Exception("Excel file is not compatible")

In [11]:
series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, data=data)

In [12]:
series_dict["Sheet2"]

[Series(series_id=UUID('8280d017-7cfd-4f5f-a55a-4bb097f36764'), worksheet=Worksheet(sheet_name='Sheet2', workbook_file_path=None, worksheet=None), series_header='col1', formulas=['=B2', '=B3'], values=[1, 4], header_location=<HeaderLocation.TOP: 'top'>, series_starting_cell=Cell(column=1, row=11, coordinate='A11', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>),
 Series(series_id=UUID('edd04b9e-379f-40f3-bc68-2d7a91803414'), worksheet=Worksheet(sheet_name='Sheet2', workbook_file_path=None, worksheet=None), series_header='col2', formulas=['=C2', '=C3'], values=[2, 5], header_location=<HeaderLocation.TOP: 'top'>, series_starting_cell=Cell(column=2, row=11, coordinate='B11', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>),
 Series(series_id=UUID('c26e65c8-395d-4852-965e-11a2cc3c6e86'), worksheet=Worksheet(sheet_name='Sheet2', workbook_file_path=None, worksheet=None), series_header='horizontal_col_1',

In [13]:
series_iterator = SeriesIterator.iterate_series(series_dict)

In [14]:
series = None
for item in series_iterator:
    if item.formulas != [None, None]:
        series = item
        break


In [15]:
series

Series(series_id=UUID('337ff33f-3a83-49b8-8c3d-3b299c2a3995'), worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='horizontal_column_1', formulas=['=B3', '=C3'], values=[1, 2], header_location=<HeaderLocation.LEFT: 'left'>, series_starting_cell=Cell(column=3, row=12, coordinate='C12', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>)

In [16]:
series_mapping = SeriesMapper.map_series(series_dict)

In [17]:
from objects import Worksheet
series_mapping[Worksheet(sheet_name='Sheet2', workbook_file_path=None, worksheet=None)]

{Cell(column=1, row=11, coordinate=None, value=None, value_type=None): (0,
  Series(series_id=UUID('8280d017-7cfd-4f5f-a55a-4bb097f36764'), worksheet=Worksheet(sheet_name='Sheet2', workbook_file_path=None, worksheet=None), series_header='col1', formulas=['=B2', '=B3'], values=[1, 4], header_location=<HeaderLocation.TOP: 'top'>, series_starting_cell=Cell(column=1, row=11, coordinate='A11', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>)),
 Cell(column=1, row=12, coordinate=None, value=None, value_type=None): (1,
  Series(series_id=UUID('8280d017-7cfd-4f5f-a55a-4bb097f36764'), worksheet=Worksheet(sheet_name='Sheet2', workbook_file_path=None, worksheet=None), series_header='col1', formulas=['=B2', '=B3'], values=[1, 4], header_location=<HeaderLocation.TOP: 'top'>, series_starting_cell=Cell(column=1, row=11, coordinate='A11', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>)),
 Cell(column=2, row=11, co

In [18]:
formula_1 = series.formulas[0]
formula_1_ast = FormulaParser.parse_formula(formula_1)

In [19]:
print(formula_1)

=B3


In [20]:
series_implementer = SeriesImplementer(series_mapping, sheet_name = series.worksheet.sheet_name)

formula_1_ast_new = series_implementer.replace_range_nodes(formula_1_ast)

cell_range: B3
worksheet: Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None)
cell: [Cell(column=2, row=3, coordinate=None, value=None, value_type=None)]


In [21]:
formula_1_new = SeriesImplementer.serialise_ast_to_formula(formula_1_ast_new)

In [22]:
formula_1_new

'45bd573e70954092ba5b8430f2efb61e_0_0'

In [23]:
formula_1_ast_new = FormulaParser.parse_formula(f'={formula_1_new}')

In [24]:
formula_visualiser = FormulaVisualiser()
ast_graph = formula_visualiser.visualise(formula_1_ast_new)
ast_graph.render('formula_1_ast_new')

'formula_1_ast_new.png'