In [16]:
import os
import xlcalculator


from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_validator import ExcelValidator
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_finder import TableFinder
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper

from ast_building.formula_parser import FormulaParser
from ast_building.series_implementer import SeriesImplementer

from formula_visualiser import FormulaVisualiser


In [17]:
current_directory = os.getcwd()

parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
data_directory = os.path.join(parent_directory, 'data')

project_name = 'test_excel_1'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")

In [18]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)

In [19]:
is_valid = ExcelValidator.validate_excel(excel_reduced)

In [20]:
if not is_valid:
    raise Exception("Excel file is not valid")

In [21]:
excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)

In [22]:
extracted_tables, data = TableFinder.find_tables(excel_reduced_clean)

In [23]:
series_data = SeriesExtractor.extract_table_details(extracted_tables, data)

In [24]:
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)

In [25]:
if not is_compatible:
    raise Exception("Excel file is not compatible")

In [26]:
series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, data=data)

In [27]:
series_mapping = SeriesMapper.map_series(series_dict)

In [28]:
series_iterator = SeriesIterator.iterate_series(series_dict)

In [29]:
series_list = [series for series in series_iterator if series.formulas != [None, None]]

In [30]:
for series in series_list:
    formula_1 = series.formulas[0]
    formula_1_ast = FormulaParser.parse_formula(formula_1)
    series_implementer = SeriesImplementer(series_mapping, sheet_name = series.worksheet.sheet_name)
    formula_1_ast_new = series_implementer.replace_range_nodes(formula_1_ast)

formula: =B3
formula: =B4
formula: =Sheet1!B3+Sheet1!C3
formula: =SUM(Sheet1!B3:D3)
formula: =Sheet1!G14+Sheet1!H14
formula: =SUM(Sheet1!G14:Sheet1!I14)
formula: =B2
formula: =C2
formula: =B2+B3
formula: =SUM(Sheet1!B2:B6)
formula: =SUM(Sheet1!C2:C6)
formula: =SUM(Sheet1!D2:D6)
formula: =Sheet3!A2*Sheet3!B2
