# Evaluator
> Hidden module containing old work on an evaluator

In [None]:
from excelbutbetter.parser import parse, parse_value, Ref, parse_formula, InfixOp, PrefixOp, PostfixOp, Function

# Evaluation
Evaluation is slightly difficult: we need to evaluate refs, but they might be circular. One way to go about this is to fix an evaluation order on the spreadsheet (let's say row 1 first, then row 2, etc.), and defer any cells that refer to not-yet-evaluated cells, making additional passes over those.
For now, let's skip over formula's with references in them, raising EvaluationError instead.

In [None]:
import operator as op
import re

def parse_criterium(criterium):
    if not isinstance(criterium, str):
        return lambda val: val == criterium
    
    # Comparison op
    comparisons = {'<=': op.le, '<>': op.ne, '<': op.lt,
                   '>=': op.ge, '>': op.gt, '=': op.eq}
    for symbol, impl in comparisons.items():
        if criterium.startswith(symbol):
            try: rest = parse_value(criterium[len(symbol):])
            except ParseError: continue
            return lambda val: impl(val, rest)
        
    # String match
    regex = []
    escaping = False
    for i, c in enumerate(criterium):
        if c == '~' and not escaping:
            escaping = True
        elif c == '~' and escaping:
            escaping = False
            regex.append(c)
        elif c == '*' and not escaping:
            regex.append('.*')
        elif c == '*' and escaping:
            escaping = False
            regex.append('\*')
        elif c == '?' and not escaping:
            regex.append('.')
        elif c == '?' and escaping:
            regex.append('\?')
        else:
            regex.append(c)
    regex = ''.join(regex)

    if regex != criterium:
        return lambda val: re.fullmatch(regex, val) is not None
    
    # Need to parse the value
    to_compare = parse_value(criterium)  # Closure are lazily evaluated
    return lambda val: val == to_compare
    
assert parse_criterium('3')(3) == True
assert parse_criterium('3')(4) == False
assert parse_criterium('<=4')(4) == True
assert parse_criterium('<=4')(5) == False
assert parse_criterium('<>"foo"')('foo') == False
assert parse_criterium('*')('foobar') == True
assert parse_criterium('foo*bar')('foobar') == True
assert parse_criterium('foo*bar')('fooooobar') == True
assert parse_criterium('foo*bar')('fooar') == False
assert parse_criterium('foo?bar')('fooobar') == True
assert parse_criterium('foo?bar')('fooar') == False
assert parse_criterium('foo~*bar')('foo*bar') == True
assert parse_criterium('foo~*bar')('foooobar') == False
assert parse_criterium('foo~?bar')('foo?bar') == True
assert parse_criterium('foo~?bar')('foo~?bar') == False

In [None]:
def evaluate(tree, context=None):  # Todo(Rik): str might evaluate to ref.
    if isinstance(tree, (int, float, bool, str)):
        return tree
    elif isinstance(tree, InfixOp) and tree.op == ':':
        start, end = tree.left, tree.right
        assert isinstance(start, Ref) and isinstance(end, Ref)
        # Responsiblity of the caller to handle IndexError
        return Range(
            values=[[context[i][j] for j in range(start.column, end.column+1)]
                     for i in range(start.row, end.row+1)],
            start=start,
            end=end
        )
    elif isinstance(tree, InfixOp):
        return infix_eval_map[tree.op](evaluate(tree.left, context), evaluate(tree.right, context))
    elif isinstance(tree, PrefixOp):
        return prefix_eval_map[tree.op](evaluate(tree.arg, context))
    elif isinstance(tree, PostfixOp):
        return postfix_eval_map[tree.op](evaluate(tree.arg, context))
    elif isinstance(tree, Function):
#         if tree.args is None: return function_eval_map[tree.name]()
        args = evaluate(tree.args, context)
        if not isinstance(args, tuple): # Gymnastics to handle one-argument functions
            args = (args,)
        return function_eval_map[tree.name](args)
    elif isinstance(tree, Ref):  # Responsiblity of the caller to handle IndexError
        return context[tree.row][tree.column]

In [None]:
operators = {
    ':': InfixOp, ' ': InfixOp,
    '-': PrefixOp, '%': PostfixOp, r'\^': InfixOp,
    r'\*': InfixOp, '/': InfixOp, r'\+': InfixOp,
    '-': InfixOp, r'\&': InfixOp, '=': InfixOp,
    '<': InfixOp, '>': InfixOp, '<=': InfixOp,
    '>=': InfixOp, '<>': InfixOp, ',': InfixOp,
}

class Range:  # We don't use a named tuple here because isinstance(namedtuple, tuple) == True.
    def __init__(self, values=[], start=None, end=None):
        if start.column > end.column or start.row > end.row: raise ValueError
        self.values = values
        self.start = start
        self.end = end
    
    def __repr__(self):
        return f'Range(values={self.values}, start={self.start}, end={self.end})'


infix_eval_map = {
    ':': None,
    ',': lambda left, right: (left, right),  # Tuple constructor
    ' ': None,  # Range intersection
    r'\^': lambda left, right: left ** right,
    r'\*': lambda left, right: left * right,
    '/': lambda left, right: left / right,
    r'\+': lambda left, right: left + right,
    '-': lambda left, right: left - right,
    r'\&': None,  # Todo(Rik) look this up
    '=': lambda left, right: left == right,
    '<': lambda left, right: left < right,
    '>': lambda left, right: left > right,
    '<=': lambda left, right: left <= right,
    '>=': lambda left, right: left >= right,
    '<>': lambda left, right: left != right,
}

prefix_eval_map = {
    '-': lambda arg: -arg,
}

postfix_eval_map = {
    '%': lambda arg: arg/100,
}

def flatten_ranges(args):
    '''Takes a tuple args and expands all the ranges to fit inline.'''
    for arg in args:
        if isinstance(arg, Range):
            yield from (v for row in arg.values for row in col)
        elif isinstance(arg, tuple):  # Todo(Rik): do I want to flatten ranges in this case?
            yield from flatten_ranges(arg)
        else:
            yield arg
            
def flatten_nested_tuples(args, flatten_ranges=False):
    '''
    Takes some nested tuples in args and flattens them.
    >>> flatten_nested_tuples((3, 4, (5, (6, 7))))
    (3, 4, 5, 6, 7)
    '''
    for arg in args:
        if isinstance(arg, tuple):
            yield from flatten_nested_tuples(arg, flatten_ranges)
        elif isinstance(arg, Range) and flatten_ranges:
            yield from (v for row in arg.values for v in row)
        else:
            yield arg

def splat(func):
    '''
    Decorate a function to be called with a tuple, like lambda args: f(*args).
    This allows us to define a function on tuples, while still benefitting from
    the arity-checking of `f`.
    This is more readable when `f` is not a function from the stdlib. For instance,
    `f(x, y) = math.atan(y/x)`. Defining `g = splat(lambda x, y: math.atan(y/x))`
    is more readable than `g = lambda args: (lambda x, y: math.atan(y/x))(*args)`.
    '''
    def wrapped(args):
        return func(*args)
    return wrapped
    
def flat_ranges(func):
    '''
    Decorate a function so ranges in the argument are flattened.
    '''
    def wrapped(args):
        return func(tuple(flatten_ranges(args)))
    return wrapped

def flat_tuples(func, flatten_ranges=False):
    '''
    Decorate a function so tuples get flattened.
    '''
    def wrapped(args):
        return func(tuple(flatten_nested_tuples(args, flatten_ranges)))
    return wrapped

def constant(value):
    def wrapped(args):
        if args is not None: raise TypeError
        return value
    return wrapped
            
import math
from numbers import Number
from functools import reduce
            
def excel_averageif(index_range, criterium, average_range=None):
    '''
    While excel doesn't require index_range and average_range to have the same shape, we do!
    Todo(Rik): maybe there needs to be some AST-rewriting pass?
    '''
    criterium = parse_criterium(criterium)
    if average_range is None:
        to_count = [i for i_row in index_range.values for i in i_row if criterium(i)]
    else:
        to_count = [a for i_row, a_row in zip(index_range.values, average_range.values)
                      for i, a in zip(i_row, a_row) if criterium(i)]
    return sum(to_count) / len(to_count)

def excel_choose(index, *values):
    if 1 <= index <= len(values):
        return values[index-1]
    else:
        raise ValueError

def count_if(should_count):
    def counter(args):
        c = 0
        for arg in args:
            if isinstance(arg, Range):
                c += sum(should_count(v) for row in arg.values for v in row)
            else:
                c += should_count(arg)
        return c
    return counter
        
def excel_countif(r, criterium):
    criterium = parse_criterium(criterium)
    return count_if(criterium)([r])

def excel_find(s, t, start_pos=0):
    return t.index(s, start_pos)+1

def excel_fv(interest, n_periods, payment=None, present_value=None, timing=0):
    assert timing in (0, 1)
    if payment is None:
        assert present_value is not None, f'Payment was not provided, so present_value must be!'
        payment = 0
    elif present_value is None:
        present_value = 0 

    appreciation = present_value * (1+interest)**n_periods
    payments = payment * (1+interest*timing)*((1+interest)**n_periods-1)/interest
    return -(appreciation + payments)
    
def excel_hlookup(lookup_value, table_range, i, is_approximate=True):
    if i < 1: raise ValueError('row index < 1 passed to hlookup')
    try: j = table_range.values[0].index(lookup_value)
    except ValueError:
        if not is_approximate: return None
        try: j = [j for j, s in enumerate(table_range.values[0]) if not s.startswith(lookup_value)][-1]
        except IndexError: return None
    return table_range.values[i-1][j]  # Excel is 1-indexed

def excel_if(cond, yes, no=False):
    return yes if cond else no

# We do our own argument parsing because the first argument might be a tuple
def excel_index(args):
    refs_or_array = args[0] if isinstance(args[0], tuple) else (args[0],)
    remaining = flat_tuples(lambda x: x)(args[1])
    i, j, a = None, None, None
    i = None if len(remaining) <= 0 else remaining[0]-1
    j = None if len(remaining) <= 1 else remaining[1]-1
    a = 0 if len(remaining) <= 2 else remaining[2]-1
    if len(remaining) > 3: raise TypeError('Too many arguments to INDEX')
    if i is None and j is None:
        raise TypeError('One of row or column number must be present.')
    elif i is not None and j is None:
        if i < 0: raise TypeError('row number < 1 in INDEX not allowed')
        return refs_or_array[a].values[i]
    elif i is None and j is not None:
        if j < 0: raise TypeError('column number < 1 in INDEX not allowed')
        return [row[j] for row in refs_or_array[a].values]
    else:
        if i < 0 or j < 0: raise TypeError('column or row number < 1 in INDEX not allowed')
        return refs_or_array[a].values[i][j]

from scipy.optimize import newton
    
def excel_irr(args):
    if len(args) == 0 or len(args) > 2: raise TypeError('too little or too many arguments to IRR')
    values = args[0]  # Todo(Rik): array argsss
    guess = args[1] if len(args) > 1 else 0.1
    return newton(lambda r: sum(v / (1+r)**i for i, v in enumerate(values)), guess)
    
def p(f):
    '''Print args and kwargs'''
    def wrapped(*a, **kw):
        print(a, kw)
        return f(*a, **kw)
    return wrapped
        
from datetime import date, time
# Functions get their arguments passed to them as a tuple.
# The responsibility for turning this into proper arguments lies with the implementor.
function_eval_map = {
    'SUM': flat_tuples(sum, flatten_ranges=True),
    'ABS': splat(abs),
    'ACOS': splat(math.acos),
    'AND': lambda args: reduce(op.and_, args, True),
    'ASIN': splat(math.asin), # lambda args: math.asin(*args),
    'ATAN': splat(math.atan), # lambda args: math.atan(*args),
    'ATAN2': splat(lambda x, y: math.atan(y/x)),
    'AVERAGE': flat_tuples(lambda x: sum(x)/len(x), flatten_ranges=True),
    'AVERAGEIF': flat_tuples(splat(excel_averageif)),
    'CHOOSE': flat_tuples(splat(excel_choose)),
    'COLUMNS': splat(lambda r: r.end.column - r.start.column + 1),
    'COS': splat(math.cos),
    'COUNT': flat_tuples(count_if(lambda v: isinstance(v, Number) and not isinstance(v, bool))),
    'COUNTA': flat_tuples(count_if(lambda v: v is not None and not isinstance(v, str))),
    'COUNTBLANK': flat_tuples(count_if(lambda v: v is None)),
    'COUNTIF': flat_tuples(splat(lambda r, c: count_if(parse_criterium(c))([r]))),
    'DATE': flat_tuples(splat(lambda year, month, day: date(year, month, day))),
    'DAY': splat(lambda d: d.day),
    'EVEN': splat(lambda n: 2*math.floor(n/2) if n < 0 else 2*math.ceil(n/2)),
    'EXACT': flat_tuples(splat(lambda s, t: s == t)),
    'EXP': splat(math.exp),
    'FACT': splat(lambda x: math.factorial(int(x))),
    'FALSE': splat(constant(False)),
    'FIND': flat_tuples(splat(excel_find)),
    'FV': flat_tuples(splat(excel_fv)),
    'HLOOKUP': flat_tuples(splat(excel_hlookup)),
    #'HOUR': ?????
    'IF': flat_tuples(splat(excel_if)),
    'INDEX': excel_index,
    'INT': flat_tuples(splat(lambda x: math.floor(x))),
    'IRR': flat_tuples(splat(excel_irr))
}

In [None]:
def C(s, context=None):
    return evaluate(parse(s), context=context)

In [None]:
assert C('=INT(8.9)') == 8
assert C('=INT(-8.9)') == -9

In [None]:
rows = [
    ['Fruit', 'Price', 'Count'],
    ['Apples', 0.69, 40],
    ['Bananas', 0.34, 38],
    ['Lemons', 0.55, 15],
    ['Oranges', 0.25, 25],
    ['Pears', 0.59, 40],
    ['Almonds', 2.8, 10],
]

assert C('=INDEX(A2:C7, 2, 3)', rows) == 38
assert C('=INDEX((A2:C4,A6:C7),2,2,2)', rows) == 2.8
assert C('=INDEX((A2:C4,A6:C7),2,2,1)', rows) == 0.34

In [None]:
assert C('=IF(10>5,"Yes","No")') == 'Yes'
assert  C('=IF(10>5,"Yes")') == 'Yes'
# Empty args not yet supported
# assert C('=IF(10>5,"Yes",)') == 'Yes'
assert C('=IF(10<5,"Yes")') == False
# assert C('=IF(10<5,"Yes",)') == 0
# assert C('=IF(10>5,,"No")') == 0
# assert C('=IF(10>5,,)') == 0 
assert C('=IF(10>5,"Yes",20)') == 'Yes'
assert C('=IF(10<5,"Yes",20)') == 20

In [None]:
rows = [
    ['Axles', 'Bearings', 'Bolts'],
    [4, 6, 9],
    [5, 7, 10],
    [6, 8, 11],
]

assert C('=HLOOKUP("Axles",A1:C4,2,TRUE)', context=rows) == 4
assert C('=HLOOKUP("Bearings",A1:C4,3,FALSE)', context=rows) == 7
assert C('=HLOOKUP("B",A1:C4,3,TRUE)', context=rows) == 5
assert C('=HLOOKUP("Bolts",A1:C4,4)', context=rows) == 11
# Array expressions not yet supported
# assert C('=HLOOKUP(3,{1,2,3;"a","b","c";"d","e","f"},2,TRUE)', context=rows) == 'c'

In [None]:
import pytest

assert C('=FV(0.06/12,10,-200,-500,1)') == pytest.approx(2581.40, abs=1e-2)
assert C('=FV(0.12/12,12,-1000)') == pytest.approx(12682.50, abs=1e-2)
# This test doesn't work due to empty entry in comma-separated list
# assert C('=FV(0.11/12,35,-2000,,1)') == pytest.approx(82846.25, abs=1e-2)
assert C('=FV(0.06/12,12,-100,-1000,1)') == pytest.approx(2301.40, abs=1e-2)

In [None]:
assert C('=FIND("de", "abcdef")') == 4

In [None]:
assert C('=FALSE()') == False

In [None]:
assert C('=FACT(5)') == 120
assert C('=FACT(3.5)') == 6
assert C('=FACT(0)') == 1

In [None]:
assert C('=EXP(0)') == 1
assert C('=EXP(-1)') == pytest.approx(0.367879441)
assert C('=EXP(1)') == pytest.approx(2.718281828)
assert C('=EXP(2)') == pytest.approx(7.389056099)

In [None]:
assert C('=EXACT("ABC", "ABC")') == True
assert C('=EXACT("ABC", "ABCD")') == False
assert C('=EXACT("Abc", "aBC")') == False
assert C('=EXACT("", "")') == True
with pytest.raises(TypeError):
    C('=EXACT("", "", "")')

In [None]:
assert C('=EVEN(1.5)') == 2
assert C('=EVEN(3)') == 4
assert C('=EVEN(2)') == 2
assert C('=EVEN(-1)') == -2

In [None]:
assert C('=DATE(2021, 7, 31)') == date(2021, 7, 31)
assert C('=DAY(DATE(2021, 7, 31))') == 31

In [None]:
rows = [[i+j for j in range(10)] for i in range(10)]

assert C('=COUNTIF(A1:J10, "=3")', rows) == 4
assert C('=COUNTIF(A1:J10, "<0")', rows) == 0

In [None]:
assert C('=COUNTBLANK(A1:D1)', context=[[1, None, 3, None]]) == 2

In [None]:
assert C('=COUNTA("foo", 3, 4, "bar")') == 2
assert C('=COUNTA(True, 3, 4, b)') == 3

In [None]:
rows = [[i+j for j in range(10)] for i in range(10)]

assert C('=COUNT("foo", 3, 4, "bar")') == 2
assert C('=COUNT(True, 3, 4, b)') == 2
assert C('=COUNT(A1:A4)', rows) == 4
assert C('=COUNT()') == 0

In [None]:
assert C('=COS(37)') == math.cos(37)
with pytest.raises(TypeError):
    C('=COS(37, 10)')

In [None]:
rows = [[i+j for j in range(10)] for i in range(10)]

assert C('=COLUMNS(A1:B4)', rows) == 2
assert C('=COLUMNS(A1:E7)', rows) == 5
assert C('=COLUMNS(A1:A2)', rows) == 1
with pytest.raises(ValueError):
    C('=COLUMNS(B7:A1)')

In [None]:
assert C('=CHOOSE(1, 3, 4, 5)') == 3
assert C('=CHOOSE(3, 3, 4, 5)') == 5
with pytest.raises(ValueError):
    C('=CHOOSE(4, 3, 4, 5)')

In [None]:
rows = [[i+j for j in range(10)] for i in range(10)]

assert C('=AVERAGEIF(A1:A4, 3)', rows) == 3
assert C('=AVERAGEIF(A1:A4, ">=2")', rows) == 2.5
assert C('=AVERAGEIF(A1:A4, ">=2", B1:B4)', rows) == 3.5

In [None]:
rows = [[i+j for j in range(10)] for i in range(10)]

assert C('=SUM(A1:A2)', [[1], [2]]) == 3
assert C('=SUM(1, 2, 3, 4, 5)') == 15
assert C('=SUM(A1:B1, 3)', [[1, 2]]) == 6
assert C('=SUM(A1:J10)', rows) == sum(sum(row) for row in rows)

In [None]:
assert C('=3*4') == 12
assert C('=(2+3)*4') == 20
assert C('=1e7 / 2') == 5_000_000
assert C('=SUM(3, 4)') == 7
assert C('=IF(3 < 4, "three is less than four", "huh?!")') == 'three is less than four'
assert evaluate(Ref(row=0, column=1), context=[[0, 1]]) == 1
assert C('=A1 + A$2', context=[[1, 2], [3, 4]]) == 4