In [1]:
import os

from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_extractor import TableExtractor
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper
from series_extraction.excel_validator import ExcelValidator

from ast_transformation.series_formula_generator_old import SeriesFormulaGenerator
from ast_transformation.formula_generator import FormulaGenerator
from ast_transformation.formula_list_generator import FormulaListGenerator
from ast_transformation.formula_evaluator import FormulaEvaluator

from ast_building.formula_parser import FormulaParser
from ast_building.series_implementer import SeriesImplementer

from pipeline_building.series_dependencies_builder import SeriesDependenciesBuilder
from pipeline_building.dag_sorter import DAGSorter

from excel_builder import ExcelBuilder

from excel_checker import ExcelChecker


In [2]:
data_directory = "/Users/chrislittle/GitHub/speedsheet/excel-2-python/data"

project_name = 'vehicle_data'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")
excel_reduced_clean_filepath = os.path.join(data_directory, "excel_files_reduced_clean", f"{project_name}_reduced_clean.xlsx")
excel_reduced_clean_series_filepath = os.path.join(data_directory, "excel_files_reduced_clean_series", f"{project_name}_reduced_clean_series.xlsx")
excel_reduced_clean_series_python_filepath = os.path.join(data_directory, "excel_files_reduced_clean_series_python", f"{project_name}_reduced_clean_series_python.xlsx")

In [3]:
def get_evaluated_results_from_formula_ast(formula_ast, series_values_dict, values_length):
    formula_list_generator = FormulaListGenerator(formula_ast, series_values_dict)
    formula_list = formula_list_generator.generate_formula_list(0, values_length-1)
    formula_strings = [str(formula) for formula in formula_list]

    print("formula_strings: ", formula_strings)

    formula_evaluator = FormulaEvaluator()

    results = [formula_evaluator.evaluate_formula(f"={formula_string}") for formula_string in formula_strings]

    return results

In [4]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)
is_valid = ExcelValidator.validate_excel(excel_reduced)
if not is_valid:
    raise Exception("Excel file is not valid")

excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)
ExcelBuilder.create_excel_from_workbook(excel_reduced_clean.workbook_with_formulas, excel_reduced_clean_filepath)

extracted_tables, workbook_data = TableExtractor.extract_tables(excel_reduced_clean)
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)
if not is_compatible:
    raise Exception("Excel file is not compatible")

series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, workbook_data=workbook_data)
series_mapping = SeriesMapper.map_series(series_dict)
series_iterator = SeriesIterator.iterate_series(series_dict)

series_list = [series for series in series_iterator]

series_list_with_formulas = [
    series for series in series_list if series.formulas != [None, None]
]
series_list_with_values = [
    series for series in series_list if series.formulas == [None, None]
]

series_list_new = []
formula_1_ast_series_list = []
generic_formula_dictionary = {}

for series in series_list_with_formulas:
    formula_1, formula_2 = SeriesFormulaGenerator.adjust_formulas(
        series.formulas
    )
    if formula_1 is not None and formula_2 is not None:

        series_implementer = SeriesImplementer(
            series_mapping, sheet_name=series.worksheet.sheet_name
        )

        formula_1_ast = FormulaParser.parse_formula(formula_1)
        formula_1_ast_series = series_implementer.update_ast(formula_1_ast)
        formula_1_ast_series_list.append((series.series_id, formula_1_ast_series))

        formula_2_ast = FormulaParser.parse_formula(formula_2)
        formula_2_ast_series = series_implementer.update_ast(formula_2_ast)

        generic_formula_ast = FormulaGenerator.traverse_and_replace(formula_1_ast_series, formula_2_ast_series)

        generic_formula_dictionary[series.series_id] = generic_formula_ast



In [5]:
series_values_dict = {str(series.series_id): series.values for series in series_list}

In [6]:
series_dependencies = SeriesDependenciesBuilder.build_dependencies(generic_formula_dictionary)

In [7]:
sorted_dag = DAGSorter.sort_dag(series_dependencies)

In [8]:
for series_id in sorted_dag:
    formula_ast = generic_formula_dictionary.get(series_id)
    
    if formula_ast:    
        series = SeriesMapper.get_series_from_series_id(series_id, series_dict)
        values_length = len(series.values)
        print(f"Series ID: {series_id}")
        print("Formula Ast: ", formula_ast)
        print("Values Length: ", values_length)
        values = get_evaluated_results_from_formula_ast(formula_ast, series_values_dict, values_length)
        series.values = values
        series_list_new.append(series)

Series ID: VehicleData|Transmission Mapped|1|21
Formula Ast:  IF((AVERAGE((('VehicleData|transmission|1|5',), (0, 0), (1, 1)))) = (2), "Automatic", "Manual")
Values Length:  2
Start new index 0
End new index 0
ARRAY VALUES:  [[2]]
Start new index 1
End new index 1
ARRAY VALUES:  [[1]]
formula_strings:  ['IF((AVERAGE(ARRAY(ARRAYROW(2)))) = (2), "Automatic", "Manual")', 'IF((AVERAGE(ARRAY(ARRAYROW(1)))) = (2), "Automatic", "Manual")']
Series ID: VehicleData|Value Rounded to Nearest 5000|1|22
Formula Ast:  (ROUNDDOWN((AVERAGE((('VehicleData|value|1|3',), (0, 0), (1, 1)))) / (5000), 0)) * (5000)
Values Length:  2
Start new index 0
End new index 0
ARRAY VALUES:  [[14945]]
Start new index 1
End new index 1
ARRAY VALUES:  [[4836]]
formula_strings:  ['(ROUNDDOWN((AVERAGE(ARRAY(ARRAYROW(14945)))) / (5000), 0)) * (5000)', '(ROUNDDOWN((AVERAGE(ARRAY(ARRAYROW(4836)))) / (5000), 0)) * (5000)']
Series ID: VehicleData|Mileage Rounded to Nearest 50,000|1|23
Formula Ast:  (ROUNDDOWN((AVERAGE((('Vehicle

In [9]:
series_list_updated = series_list_new + series_list_with_values

In [10]:
ExcelBuilder.create_excel_from_series(series_list_updated, excel_reduced_clean_series_python_filepath, values_only=True)

In [14]:
ExcelChecker.excels_are_equivalent(excel_reduced_clean_series_python_filepath, excel_reduced_clean_filepath)

True