In [35]:
from openpyxl import Workbook, load_workbook
from openpyxl.writer.excel import save_virtual_workbook
import os
import pandas as pd
from array import array
from datetime import datetime, timedelta
from pycel import ExcelCompiler
from dateutil.relativedelta import relativedelta

In [121]:
class SheetAnalyzer:

    BASE_ELEMENTS_ROW = 17 # Location of base's elements
    UNIT_ROW = 18 # Location of units
    CURVE = 19 # Location of Curve's interpolations
    METADATA_COL = "A"
    POINT_N_COL = "B"
    DATE_COL = "D"

    def __init__(self, input, sheet_number) -> None:
        self.wb = load_workbook(os.getcwd()+ input)
        self.evaluator = ExcelCompiler(filename=os.getcwd()+ input)
        self.sheet_number = sheet_number
        self.sheet_name = self.wb.sheetnames[sheet_number]
        self.ws = self.wb[self.sheet_name]

        self.dates = []
        self.json = {}

    # Create function detect and evaluate excel function when it's present on cell

    def _fullFillPointsWithDates(self, points:array):
        ref_date = None
        for index, point in enumerate(points):
            if point["date"] is not None:
                ref_date = {"index": index, "date": point["date"]}
                break
        
        if ref_date is not None:
            for index, point in enumerate(points):
                if point["date"] is None:
                    if index < ref_date["index"]:
                        point["date"] = ref_date["date"] + relativedelta(years=-(ref_date["index"]-index))
                    else:
                        point["date"] = ref_date["date"] + relativedelta(years=(index-ref_date["index"]))
        
        return points
            
    def _getPointsWithDates(self):
        """
            Return dict (row_number, point_no, date) of all points and associate's date
        """
        points=[]
        for point in self.ws[self.POINT_N_COL]:
            if point.value is not None and type(point.value) is int:
                points.append({
                    "row": point.row, 
                    "point_n": point.value, 
                    "date": self.ws[self.DATE_COL+str(point.row)].value
                })
            elif point.value is not None and point.value.startswith("="):
                points.append({
                    "row": point.row, 
                    "point_n": self.evaluator.evaluate(self.sheet_name+"!"+point.coordinate), 
                    "date": self.ws[self.DATE_COL+str(point.row)].value
                })
            else:
                continue
        
        return self._fullFillPointsWithDates(points)

    def _getSpecifications(self):
        """
        Return dict (column_id, specification, unit, interpolation) at BASE_ELEMENTS_ROW for a given sheetname
        Ignore the firsts 2 elements because they always not belong to specifications
        """
        return [{
            "column": be.column, 
            "specification_name": be.value, 
            "unit": self.ws.cell(row=self.UNIT_ROW, column=be.column).value, 
            "interpolation": self.ws.cell(row=self.CURVE, column=be.column).value
        } for be in self.ws[self.BASE_ELEMENTS_ROW] if be.value is not None][2:]

    def _getValuesBySpecificiation(self, specification):
        return [{ 
            "row": date["row"], 
            "value": float(self.ws.cell(row=date["row"], column=specification["column"]).value)
            } for date in self.dates if self.ws.cell(row=date["row"], column=specification["column"]).value is not None]

    def _mapValuesAccordingSpecification(self, specification, values):
        if specification["interpolation"] == "CONST":
            return [next(item["value"] for item in values if item["value"] is not None)] * len(self.dates)
        
        if specification["interpolation"] == "LINEAR":
            pass

    def _getSpectificationsWithInterpolations(self):
        """
            return a dict with specifications and interpolations
        """
        result = []
        if self.dates == []:
            self.dates = self._getPointsWithDates()

        for specification in self._getSpecifications():
            values = self._getValuesBySpecificiation(specification)
            specification["value"] = self._mapValuesAccordingSpecification(specification, values) 
            return specification
            
    def _getBaseElementMetaData(self):
        """
        Return array of tuples (row_id, metadata_name, metadata_value) for a given sheetname
        """
        return [(cell.row, cell.value, self.ws['B'][cell.row].value) for cell in self.ws[self.METADATA_COL] if (cell.value is not None and cell.row < self.BASE_ELEMENTS_ROW)]

    def _getRawDataStorage(self):
        """
            Return JSON storage of the input
        """
        return self._getSpectificationsWithInterpolations()

In [122]:
sa = SheetAnalyzer("/input/model.xlsx", 2)
sa._getSpectificationsWithInterpolations()

{'column': 5,
 'specification_name': 'Nominal Voltage',
 'unit': 'V',
 'interpolation': 'CONST',
 'value': [3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2,
  3.2]}