In [1]:
import os

from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_validator import ExcelValidator
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_finder import TableFinder
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper

from ast_building.formula_parser import FormulaParser
from ast_building.series_implementer import SeriesImplementer

from ast_transformation.formula_generator import FormulaGenerator
from ast_transformation.formula_checker import FormulaChecker
from ast_transformation.cell_range_implementer import CellRangeImplementer

from excel_builder import ExcelBuilder

from excel_checker import ExcelChecker

In [2]:
current_directory = os.getcwd()

parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
data_directory = os.path.join(parent_directory, 'data')

project_name = 'test_excel_9'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")
excel_reduced_clean_filepath = os.path.join(data_directory, "excel_files_reduced_clean", f"{project_name}_reduced_clean.xlsx")
excel_reduced_clean_series_filepath = os.path.join(data_directory, "excel_files_reduced_clean_series", f"{project_name}_reduced_clean_series.xlsx")

In [3]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)
is_valid = ExcelValidator.validate_excel(excel_reduced)

if not is_valid:
    raise Exception("Excel file is not valid")

excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)
ExcelBuilder.create_excel_from_openpyxl_workbook(excel_reduced_clean.workbook_with_formulas, excel_reduced_clean_filepath)
extracted_tables, data = TableFinder.find_tables(excel_reduced_clean)
series_data = SeriesExtractor.extract_table_details(extracted_tables, data)
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)
if not is_compatible:
    raise Exception("Excel file is not compatible")
series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, data=data)
series_mapping = SeriesMapper.map_series(series_dict)
series_iterator = SeriesIterator.iterate_series(series_dict)

series_list = [series for series in series_iterator]

series_list_with_formulas = [series for series in series_list if series.formulas != [None, None]]
series_list_with_values = [series for series in series_list if series.formulas == [None, None]]


In [4]:
series_list_new = []

for series in series_list_with_formulas:
    formula_1, formula_2 = series.formulas

    if formula_1 is not None and formula_2 is not None:

        formula_1_ast = FormulaParser.parse_formula(formula_1)
        series_implementer = SeriesImplementer(series_mapping, sheet_name = series.worksheet.sheet_name)
        formula_1_ast_series = series_implementer.update_ast(formula_1_ast)

        formula_2_ast = FormulaParser.parse_formula(formula_2)
        formula_2_ast_series = series_implementer.update_ast(formula_2_ast)
        
        sheet_name = series.worksheet.sheet_name

        series_list = series_dict.get(sheet_name)

        ast_generator = FormulaGenerator.get_ast_generator(formula_1_ast_series, formula_2_ast_series, series_list)

        formulas_are_correct, formula_1_ast_new, formula_2_ast_new = FormulaChecker.check_formulas(ast_generator)

        cell_range_implementer = CellRangeImplementer(series_dict)

        formula_1_ast_new_cell_ranges = cell_range_implementer.update_ast(formula_1_ast_new)
        formula_2_ast_new_cell_ranges = cell_range_implementer.update_ast(formula_2_ast_new)
        
        series.formulas = [f"={formula_1_ast_new_cell_ranges}", f"={formula_2_ast_new_cell_ranges}"]
        series_list_new.append(series)
        
        if not formulas_are_correct:
            raise Exception("Formulas are not correct")

In [5]:
series_list_updated = series_list_new + series_list_with_values

ExcelBuilder.create_excel_from_series(series_list_updated, excel_reduced_clean_series_filepath)
ExcelChecker.excels_are_equivalent(excel_reduced_clean_filepath, excel_reduced_clean_series_filepath)

Values in sheet 'FinancialAnalysis' are different
wb1: [('Date', 'Revenue', 'Expenses', 'Marketing', 'Profit', 'Cumulative Profit', 'Average Expenses', 'Profit Status', 'Rounded Profit', None, 'Summary Name', 'Revenue', 'Expenses', 'Marketing', 'Profit', 'Cumulative Profit', 'Average Expenses'), (45395, 5000, 3200, 800, None, None, None, None, None, None, 'Summary Value', None, None, None, None, None, None), (45394, 4800, 3000, 850, None, None, None, None, None, None, None, None, None, None, None, None, None)]
wb2: [('Date', 'Revenue', 'Expenses', 'Marketing', 'Profit', 'Cumulative Profit', 'Average Expenses', 'Profit Status', 'Rounded Profit', None, 'Summary Name'), (45395, 5000, 3200, 800, None, None, None, None, None, None, 'Summary Value'), (45394, 4800, 3000, 850, None, None, None, None, None, None, None)]


False