In [52]:
import os

from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_validator import ExcelValidator
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_finder import TableFinder
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper

from ast_building.formula_parser import FormulaParser
from ast_building.series_implementer import SeriesImplementer

from ast_transformation.formula_generator import FormulaGenerator

In [53]:
current_directory = os.getcwd()

parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
data_directory = os.path.join(parent_directory, 'data')

project_name = 'test_excel_1'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")

In [54]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)

In [55]:
is_valid = ExcelValidator.validate_excel(excel_reduced)

In [56]:
if not is_valid:
    raise Exception("Excel file is not valid")

In [57]:
excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)

In [58]:
extracted_tables, data = TableFinder.find_tables(excel_reduced_clean)

In [59]:
series_data = SeriesExtractor.extract_table_details(extracted_tables, data)

In [60]:
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)

In [61]:
if not is_compatible:
    raise Exception("Excel file is not compatible")

In [62]:
series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, data=data)

In [63]:
series_dict

{'Sheet1': [Series(series_id='Sheet1|horizontal_column_1|2|12', worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='horizontal_column_1', formulas=['=B3', '=C3'], values=[1, 2], header_location=<HeaderLocation.LEFT: 'left'>, series_starting_cell=Cell(column=3, row=12, coordinate='C12', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>),
  Series(series_id='Sheet1|horizontal_column_2|2|13', worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='horizontal_column_2', formulas=['=B4', '=C4'], values=[3, 4], header_location=<HeaderLocation.LEFT: 'left'>, series_starting_cell=Cell(column=3, row=13, coordinate='C13', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>),
  Series(series_id='Sheet1|col_1|2|2', worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='col_1', formulas=[N

In [64]:
series_mapping = SeriesMapper.map_series(series_dict)

In [65]:
series_mapping

{Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None): {Cell(column=3, row=12, coordinate=None, value=None, value_type=None): (0,
   Series(series_id='Sheet1|horizontal_column_1|2|12', worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='horizontal_column_1', formulas=['=B3', '=C3'], values=[1, 2], header_location=<HeaderLocation.LEFT: 'left'>, series_starting_cell=Cell(column=3, row=12, coordinate='C12', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>)),
  Cell(column=4, row=12, coordinate=None, value=None, value_type=None): (1,
   Series(series_id='Sheet1|horizontal_column_1|2|12', worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='horizontal_column_1', formulas=['=B3', '=C3'], values=[1, 2], header_location=<HeaderLocation.LEFT: 'left'>, series_starting_cell=Cell(column=3, row=12, coordinate='C12', value=None, value_type=None), se

In [66]:
series_iterator = SeriesIterator.iterate_series(series_dict)

In [67]:
series_list = [series for series in series_iterator if series.formulas != [None, None]]

Series Id can be concatenation of sheet_name, header_name, header_column_index, header_row_index

In [68]:
for series in series_list:
    formula_1 = series.formulas[0]
    formula_1_ast = FormulaParser.parse_formula(formula_1)
    series_implementer = SeriesImplementer(series_mapping, sheet_name = series.worksheet.sheet_name)
    formula_1_ast_series = series_implementer.replace_range_nodes(formula_1_ast)

    formula_2 = series.formulas[1]
    formula_2_ast = FormulaParser.parse_formula(formula_2)
    formula_2_ast_series = series_implementer.replace_range_nodes(formula_2_ast)

    ast_generator = FormulaGenerator.get_ast_generator(formula_1_ast_series, formula_2_ast_series)
    
    ast_delta = ast_generator.get_ast_with_deltas()
    ast_delta_serialised = SeriesImplementer.serialise_ast_to_formula(ast_delta)
    print(ast_delta_serialised)
    

('Sheet1|col_1|2|2',), (0, 0, 1, 1), 0
('Sheet1|col_1|2|2',), (0, 0, 1, 1), 1
(('Sheet1|col_1|2|2',), (1, 1, 0, 0), 0 + ('Sheet1|col_2|3|2',), (1, 1, 0, 0), 0)
SUM(('Sheet1|col_1|2|2', 'Sheet1|col_2|3|2', 'Sheet1|col_3|4|2'), (1, 1, 0, 0), 0)
(('Sheet1|col_1|7|13',), (1, 1, 0, 0), 0 + ('Sheet1|col_2|8|13',), (1, 1, 0, 0), 0)
SUM(('Sheet1|col_1|7|13', 'Sheet1|col_2|8|13', 'Sheet1|col_3|9|13'), (1, 1, 0, 0), 0)
('Sheet2|horizontal_col_1|1|2',), (0, 0, 0, 0), 0
('Sheet2|horizontal_col_1|1|2',), (0, 0, 0, 0), 1
(('Sheet2|horizontal_col_1|1|2',), (1, 1, 0, 0), 0 + ('Sheet2|horizontal_col_2|1|3',), (1, 1, 0, 0), 0)
SUM(('Sheet3|col_2|2|1',), (0, 0, 0, 0), 0)
SUM(('Sheet3|col_3|3|1',), (0, 0, 0, 0), 0)
SUM(('Sheet3|col_4|4|1',), (0, 0, 0, 0), 0)
(('Sheet3|col_1|1|1',), (1, 1, 0, 0), 0 * ('Sheet3|col_2|2|1',), (1, 1, 0, 0), 0)
