In [1]:
import os

from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_extractor import TableExtractor
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper
from series_extraction.excel_validator import ExcelValidator

from ast_transformation.formula_generator import FormulaGenerator

from ast_building.formula_parser import FormulaParser

from excel_builder import ExcelBuilder

In [2]:
data_directory = "/Users/chrislittle/GitHub/speedsheet/excel-2-python/data"

project_name = 'test_excel_2'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")
excel_reduced_clean_filepath = os.path.join(data_directory, "excel_files_reduced_clean", f"{project_name}_reduced_clean.xlsx")
excel_raw_clean_filepath = os.path.join(data_directory, "excel_files_raw_clean", f"{project_name}_raw_clean.xlsx")
excel_reduced_clean_series_filepath = os.path.join(data_directory, "excel_files_reduced_clean_series", f"{project_name}_reduced_clean_series.xlsx")
excel_reduced_clean_series_python_filepath = os.path.join(data_directory, "excel_files_reduced_clean_series_python", f"{project_name}_reduced_clean_series_python.xlsx")
excel_raw_clean_series_python_filepath = os.path.join(data_directory, "excel_files_raw_clean_series_python", f"{project_name}_raw_clean_series_python.xlsx")

In [3]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)
is_valid = ExcelValidator.validate_excel(excel_reduced)
if not is_valid:
    raise Exception("Excel file is not valid")

excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)
excel_raw_clean = ExcelCleaner.clean_excel(excel_raw)
ExcelBuilder.create_excel_from_workbook(excel_reduced_clean.workbook_with_formulas, excel_reduced_clean_filepath)
ExcelBuilder.create_excel_from_workbook(excel_raw_clean.workbook_with_formulas,  excel_raw_clean_filepath)

extracted_tables, workbook_data = TableExtractor.extract_tables(excel_reduced_clean)
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)
if not is_compatible:
    raise Exception("Excel file is not compatible")

series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, workbook_data=workbook_data)
series_mapping = SeriesMapper.map_series(series_dict)
series_iterator = SeriesIterator.iterate_series(series_dict)

series_list = [series for series in series_iterator]

series_list_with_formulas = [
    series for series in series_list if series.formulas != [None, None]
]
series_list_with_values = [
    series for series in series_list if series.formulas == [None, None]
]

generic_formula_dictionary = {}

for series in series_list_with_formulas:
    formula_1, formula_2 = series.formulas
    
    if formula_1 is not None and formula_2 is not None:

        formula_1_ast_elements, _ = FormulaParser.parse_formula(formula_1)
        formula_2_ast_elements, _ = FormulaParser.parse_formula(formula_2)

        generic_formula = FormulaGenerator.generate_generic_formula(formula_1_ast_elements, formula_2_ast_elements)

        sheet_name = series.series_id.sheet_name

        generic_formula_dictionary[series.series_id] = (formula_1_ast_elements, generic_formula)

In [4]:
formula_1_ast_elements, generic_formula = list(generic_formula_dictionary.values())[5]

In [5]:
formula_1_ast_elements

[CONCATENATE <Function>,
 ( <Parenthesis>,
 SUMMARY!A2 <Range>,
 , <Operator>,
  Parking Recharge <String>,
 ) <Parenthesis>]

In [6]:
from excel_utils import ExcelUtils
from formulas.tokens.operand import Range

In [7]:
def get_sheet_name_from_range(range: Range):
    """Get sheet name from range"""
    if "!" in range.name:
        return range.name.split("!")[0], range.name.split("!")[1]
    return None, range.name

def apply_delta_to_range(range: Range, delta):
    """Apply delta to range"""

    print(f"range: {range}")

    sheet_name, range_string = get_sheet_name_from_range(range)

    cell_start_column_delta, cell_start_row_delta, cell_end_column_delta, cell_end_row_delta = delta

    cell_start_column, cell_start_row, cell_end_column, cell_end_row, is_column_range = ExcelUtils.get_coordinates_from_range(range_string)

    if not is_column_range:
        cell_start_column += cell_start_column_delta
        cell_start_row += cell_start_row_delta
        cell_end_column += cell_end_column_delta
        cell_end_row += cell_end_row_delta

        new_range_string = ExcelUtils.get_range_from_coordinates(cell_start_column, cell_start_row, cell_end_column, cell_end_row, is_column_range)
        print(f"new_range_string: {new_range_string}")
        if sheet_name:
            return Range(f"{sheet_name}!{new_range_string}")
        return Range(new_range_string)
    return range


In [8]:
generic_formula

{2: (0, 1, 0, 1)}

In [9]:
def generate_formula_list(formula_1_ast_elements, generic_formula):

    list_length = 2

    formula_list = []

    for _ in range(list_length):
        for index, element in enumerate(formula_1_ast_elements):
            if isinstance(element, Range):
                delta = generic_formula[index]
                print(f"delta: {delta}")
                new_element = apply_delta_to_range(element, delta)
                print(f"new_element: {new_element}")
                formula_1_ast_elements[index] = new_element
                print(f"formula_1_ast_elements: {formula_1_ast_elements}")
                formula_list.append("".join(element.name for element in formula_1_ast_elements))

    return formula_list

In [10]:
generate_formula_list(formula_1_ast_elements, generic_formula)

delta: (0, 1, 0, 1)
range: SUMMARY!A2 <Range>
new_range_string: A3:A3
new_element: SUMMARY!A3 <Range>
formula_1_ast_elements: [CONCATENATE <Function>, ( <Parenthesis>, SUMMARY!A3 <Range>, , <Operator>,  Parking Recharge <String>, ) <Parenthesis>]
delta: (0, 1, 0, 1)
range: SUMMARY!A3 <Range>
new_range_string: A4:A4
new_element: SUMMARY!A4 <Range>
formula_1_ast_elements: [CONCATENATE <Function>, ( <Parenthesis>, SUMMARY!A4 <Range>, , <Operator>,  Parking Recharge <String>, ) <Parenthesis>]


['CONCATENATE(SUMMARY!A3, Parking Recharge)',
 'CONCATENATE(SUMMARY!A4, Parking Recharge)']