Calculator
=

Goal: to parse expressions such as "4\*235^(1+12.4)" similar to how python would


In [27]:
verbrose = True

Setup
=

Parsing individual operations

In [52]:
######
# FP Arithmetic

op_eval_lookup = {
    "^" : float.__pow__,
    "/" : float.__floordiv__,
    "*" : float.__mul__,
    "-" : float.__sub__,
    "+" : float.__add__,
}

def evaluate(a, b, op):
    
    f = op_eval_lookup[op]
    result = f(float(a), float(b))
    
    if verbrose: print(a, op, b, "=", result)
    
    return result

In [53]:
######
# Int Arithmetic

op_eval_lookup = {
    "^" : int.__pow__,
    "/" : int.__floordiv__,
    "*" : int.__mul__,
    "-" : int.__sub__,
    "+" : int.__add__,
}

def evaluate(a, b, op):
    
    f = op_eval_lookup[op]
    result = f(int(a), int(b))
    
    if verbrose: print(a, op, b, "=", result)
    
    return result

In [55]:
evaluate(2 ,10, "^")
evaluate(1, 3, "+")

2 ^ 10 = 1024
1 + 3 = 4


4

Information about operator precedence

In [56]:
class Level:
    def __init__(self, order, members):
        self.order = order
        self.members = members
        
orderRL = True
orderLR = False

levels = [
    Level(orderRL, ["^"]),
    Level(orderLR, ["*", "/"]),
    Level(orderLR, ["+", "-"])
]

# Note: in python, operators of the same precedence are grouped
# left to right, EXCEPT EXPONENTIATION, which is grouped right to left.

Logic
=

The "reduce" method is the main item - it takes a list of numbers separated by operators, and a category of operators to the tokens. It then performs the calculations that are part of the level, reducing the size of the list.

"evalulate_flat" uses reduce for each level to turn a list of tokens into a single number. It does not support brackets, however

In [57]:
# Note that the list manipulations are very roundabout and not pythonic at all
# (With even a preallocation at the start)
# This is because this is a prototype code for another language with a far more 
# limited list definition

def reduce(tokens, level):

    n = 0
    for i in range(1, len(tokens)-1, 2):
        if tokens[i] not in level.members:
            n += 1
    output_length = 1 + 2*n
    output_tokens = [None] * output_length
    
    if level.order == orderLR:
        
        i = 0
        j = 0
        
        while i+2 < len(tokens):
            
            if tokens[i+1] in level.members:
                
                a = tokens[i]
                op = tokens[i+1]
                b = tokens[i+2]
                    
                result = evaluate(a, b, op)
                
                tokens[i] = None
                tokens[i+1] = None
                tokens[i+2] = result
                
            else:
                output_tokens[j] = tokens[i]
                output_tokens[j+1] = tokens[i+1]
                j += 2
            
            i += 2
            
        output_tokens[j] = tokens[i]
            
    elif level.order == orderRL:
        
        i = len(tokens) - 1
        j = output_length - 1
        
        while i-2 >= 0:
            
            if tokens[i-1] in level.members:
                
                a = tokens[i-2]
                op = tokens[i-1]
                b = tokens[i]
                
                result = evaluate(a, b, op)
                
                tokens[i-2] = result
                tokens[i-1] = None
                tokens[i] = None
                
            else:
                output_tokens[j] = tokens[i]
                output_tokens[j-1] = tokens[i-1]
                j -= 2
            
            i -= 2
            
        output_tokens[j] = tokens[i]

    return output_tokens
            
def evaluate_flat(tokens):
    
    for level in levels:
        tokens = reduce(tokens, level)
        
    return tokens[0]

In [58]:
tokens = [2, "*", 2, "^", 2, "^", 3, "+", 1]
evaluate_flat(tokens)

2 ^ 3 = 8
2 ^ 8 = 256
2 * 256 = 512
512 + 1 = 513


513

The "evaluate_tokens" accepts brackets as well as numbers and operators, acting recursively to evaluate inner groups before outer groups

In [34]:
def evaluate_tokens(tokens):
    
    outer = []
    inner = []

    depth = 0
    
    for token in tokens:
        
        if token == "(":
           
            depth += 1
        
        elif token == ")":
            
            depth -= 1
            
            if depth == 0:
                outer.append(evaluate_tokens(inner))
                inner = []

        else:
            if depth == 0:
                outer.append(token)
            else:
                inner.append(token)

    value = evaluate_flat(outer)
    return value

In [35]:
# (2 + 2) * (1 + (1 + 1))
tokens = ["(", 2, "+", 2, ")", "*", "(", 1, "+", "(", 1, "+", 1, ")", ")"]
evaluate_tokens(tokens)

2 + 2 = 4
1 + 1 = 2
2 + 1 = 3
4 * 3 = 12


12

Parsing
=

All that remains is to convert text into tokens

In [36]:
def check_bracket(text, index, tokens):
    
    if index >= len(text):
        return index
    
    if text[index] in "()":
        tokens.append(text[index])
        index += 1
    
    return index

def collect_number(text, index, tokens):
    
    if index >= len(text):
        return index
    
    number = ""
    
    while index < len(text) and (text[index] in "0123456789"):
        number += text[index]
        index += 1
    
    tokens.append(int(number))
    return index

def collect_operator(text, index, tokens):
    
    if index >= len(text):
        return index
    
    tokens.append(text[index])
    
    index += 1
    return index

In [37]:
text = "12+(34*34)"

index = 0
tokens = []
while index < len(text):
    index = check_bracket(text, index, tokens)
    index = collect_number(text, index, tokens)
    index = check_bracket(text, index, tokens)
    index = collect_operator(text, index, tokens)
    
print(tokens)

[12, '+', '(', 34, '*', 34, ')']


In [38]:

def evaluate_expression(text):
    
    if verbrose: print("Input:", text)
    
    index = 0
    tokens = []
    
    while index < len(text):
        index = check_bracket(text, index, tokens)
        index = collect_number(text, index, tokens)
        index = check_bracket(text, index, tokens)
        index = collect_operator(text, index, tokens)
    
    return evaluate_tokens(tokens)   

Testing
==

In [49]:
from random import randrange, choice

def reset():
    global count, passed, skipped
    count = 0
    passed = 0
    skipped = 0

def display():
    print("Count:", count)
    print("Passed:", passed)
    print("Skipped:", skipped)
    
def generate():
    
    text = str(randrange(100))
    
    for i in range(10):
        op = choice(["+", "-", "*", "/", "^"])
        text += op
        
        if op == "^":
            N = 3
        else:
            N = 100
        
        text += str(randrange(N))
            
    return text
    
def test():
    
    global count, passed, skipped
    count += 1
    
    text = generate()
    
    try:
        expected = int(eval(text.replace("^", "**").replace("/", "//")))
        skip = False
    except (ZeroDivisionError, OverflowError):
        skip = True
        
    if skip:
        skipped += 1
        return True
    else:
        result = evaluate_expression(text)
        if expected == result:
            passed += 1
            return True
        else:
            print("FAIL:", text)
            print("Expected:", expected)
            print("Result", result)
            return False

verbrose=False
reset()

# Test on lots of random expressions
for i in range(10000):
    if not test():
        break
        
display()


#######
# TWO MAIN REASONS FOR FAILURE:

#    - Floating point error - sometimes differences are very very small, which makes
#       me think that python does the order of operations differently sometimes where the 
#       changing the order would not make a difference in an analytical world
#
#    - Overflow - I wonder does python reorder things to avoid overflow, actually.
#
# These apply to floating point arithimetic. Integer mode is much more reliable.

Count: 10000
Passed: 9817
Skipped: 183


In [50]:
verbrose=True
evaluate_expression("62/4+53-25+75/44^1+54-50^1*57")

Input: 62/4+53-25+75/44^1+54-50^1*57
50 ^ 1 = 50
44 ^ 1 = 44
62 / 4 = 15
75 / 44 = 1
50 * 57 = 2850
15 + 53 = 68
68 - 25 = 43
43 + 1 = 44
44 + 54 = 98
98 - 2850 = -2752


-2752