In [1]:
import os
import xlcalculator


from series_extraction.excel_loader import ExcelLoader
from series_extraction.excel_validator import ExcelValidator
from series_extraction.excel_cleaner import ExcelCleaner
from series_extraction.table_finder import TableFinder
from series_extraction.series_extractor import SeriesExtractor
from series_extraction.excel_compatibility_checker import ExcelCompatibilityChecker
from series_extraction.series_iterator import SeriesIterator
from series_extraction.series_mapper import SeriesMapper

from ast_building.formula_parser import FormulaParser


In [2]:
current_directory = os.getcwd()

parent_directory = os.path.abspath(os.path.join(current_directory, os.pardir))
data_directory = os.path.join(parent_directory, 'data')

project_name = 'test_excel_8'

excel_raw_file_path = os.path.join(data_directory, "excel_files_raw", f"{project_name}_raw.xlsx")
excel_reduced_filepath = os.path.join(data_directory, "excel_files_reduced", f"{project_name}_reduced.xlsx")

In [3]:
excel_raw= ExcelLoader.load_file(excel_raw_file_path)
excel_reduced = ExcelLoader.load_file(excel_reduced_filepath)

In [4]:
is_valid = ExcelValidator.validate_excel(excel_reduced)

In [5]:
if not is_valid:
    raise Exception("Excel file is not valid")

In [6]:
excel_reduced_clean = ExcelCleaner.clean_excel(excel_reduced)

In [7]:
extracted_tables, data = TableFinder.find_tables(excel_reduced_clean)

In [8]:
series_data = SeriesExtractor.extract_table_details(extracted_tables, data)

In [9]:
is_compatible = ExcelCompatibilityChecker.check_file(excel_raw, excel_reduced, extracted_tables)

In [10]:
if not is_compatible:
    raise Exception("Excel file is not compatible")

In [11]:
series_dict = SeriesExtractor.extract_series(extracted_tables=extracted_tables, data=data)

In [12]:
series_iterator = SeriesIterator.iterate_series(series_dict)

In [13]:
series = None
for item in series_iterator:
    if item.formulas != [None, None]:
        series = item
        break


In [14]:
series

Series(series_id=UUID('4f90bdbd-51ee-4d11-a5d6-a708c3cf180e'), worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='Index_Match_Size', formulas=['=INDEX(B:B,MATCH(A2,A:A,0))', '=INDEX(B:B,MATCH(A3,A:A,0))'], values=['male', 'female'], header_location=<HeaderLocation.TOP: 'top'>, series_starting_cell=Cell(column=13, row=2, coordinate='M2', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.STR: 'str'>)

In [15]:
series_mapping = SeriesMapper.map_series(series_dict)

In [35]:

import xlcalculator


class FormulaParser:

    @staticmethod
    def parse_formula(formula: str) -> xlcalculator.ast_nodes.ASTNode:
        parser = xlcalculator.parser.FormulaParser()
        ast = parser.parse(formula=formula, named_ranges={})
        return ast
    
formula_1 = "=SUM(A2:A4)+AVERAGE(A7:A8)"
formula_1_ast = FormulaParser.parse_formula(formula_1)



In [39]:
from formula_visualiser import FormulaVisualiser

visualizer = FormulaVisualiser()
ast_graph = visualizer.visualise(formula_1_ast)
ast_graph.render('formula_1_ast')


'formula_1_ast.png'

In [None]:
series_mapping

{Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None): {Cell(column=1, row=2, coordinate=None, value=None, value_type=None): (0,
   Series(series_id=UUID('192a7273-8b4d-437b-81ae-e3ac09c8b070'), worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='age', formulas=[None, None], values=[56, 46], header_location=<HeaderLocation.TOP: 'top'>, series_starting_cell=Cell(column=1, row=2, coordinate='A2', value=None, value_type=None), series_length=2, series_data_type=<SeriesDataType.INT: 'int'>)),
  Cell(column=1, row=3, coordinate=None, value=None, value_type=None): (0,
   Series(series_id=UUID('192a7273-8b4d-437b-81ae-e3ac09c8b070'), worksheet=Worksheet(sheet_name='Sheet1', workbook_file_path=None, worksheet=None), series_header='age', formulas=[None, None], values=[56, 46], header_location=<HeaderLocation.TOP: 'top'>, series_starting_cell=Cell(column=1, row=2, coordinate='A2', value=None, value_type=None), series_length=2, serie

In [None]:
class SeriesImplementer:

    @staticmethod
    def implement_series(formula_ast: xlcalculator.ast_nodes.ASTNode) -> xlcalculator.ast_nodes.ASTNode:

        formula_ast_series = formula_ast
        return formula_ast_series
