# Parser 4.10 - Arithmetic Multiplication

The goal for this version of the parser is to be able to perform arithmetic multiplication.

Changes to the parser are small, basically adding a new check in the *wantoperator* state. Changes to the evaluator are more extensive because we have new out of range checks to make.

## Libraries

In [None]:
import glob       # for searching directories

import re         # for regular exprssions

## User output

In [None]:
visSep = '-------------'             # visual separator

def UIwriteln(this):
    '''write a single line to output'''
    print( f'{this}\n' )
    
def UIwriteSep():
    '''write a visual separator'''
    UIwriteln( visSep )

def UIshow(tag, value):
    '''write a tagged value to output'''
    UIwriteln( f'{tag}: {value}' )

def UIerror(this):
    '''write an error message to output'''
    UIshow( 'Error', this )

# Tracing

In [None]:
# flags: show trace of processing

showInteract = True          # default for interactive use
showBatch = False            # default for batch use

showTrace = None             # control flag

# Trace Output

def TOshow(mesg, text):
    '''write trace message to output if enabled'''
    if showTrace:
        UIshow( f'{mesg:15s}', text )
        
def TOstring(tag, this):
    
    if showTrace:
        TOshow( tag, ' '.join([str(e) for e in this]) )

# -----------------------
# Parse Tracing
# -----------------------

def PTshowexpr(this):

    TOshow( 'Parse', visSep )
    TOshow( 'Current Expr', this )

def PTshowparse(ok, res, stk):

    if ok:
        TOstring( 'Current RPN', res )
        TOstring( 'Operator Stack', stk )

def PTshowtoken(this):

    if not this[0] == ' ':
        TOshow( "Found Token", this )

# -----------------------
# Evaluation Tracing
# -----------------------

def ETshowtoken(this):
    
    TOshow( 'Eval', visSep )
    TOshow( 'Current token', this )

def ETshoweval(ok, stk):
    
    if ok:
        TOstring( 'Operand Stack', stk )


# Common

In [None]:
intMax =  4294967295                # 2**32-1, for range checking
intMin = -4294967296                # -(2**32)

# Parser

In [None]:
versionNumber = '4.10'

# operands accepted:
# - decimal integer literals
# - hexadecimal integer literals

# operators accepted:
# - unary negation, plus
# - binary addition, subtraction, multiplication

# errors detected:
# - unrecognized input
# - out of range numeric input
# - malformed expression

# result tuple:
# - (True, [parse])
# - (False, None)

def PEdoparse(this):
       
    # initialize
    
    expr = this                # save to new variable but retain original for error reports
    start = 15                 # tracked so we can report where in an expression an error occurred
    token = None               # anything successfully matched
    ok = wantoperand = True    # flags
    result = []                # rpn expression
    stk = [ ('EOE', 1) ]       # operator stack
               
    def parseErr(mesg):
        '''report parse error'''
        UIerror(mesg)
        UIwriteln(f'>>> {this}')
        UIwriteln(f'{"^^near here".rjust(start)}')
        return False
     
    def popGEop(prec):
        '''pop operators of equal or greater precedence'''
        while prec <= stk[-1][1]:
            result.append(stk.pop()[0])
            
    def pushLeft(op, prec):
        '''push left associative operator on stack'''
        popGEop(prec)
        stk.append( (op, prec) )
            
    def popGop(prec):
        '''pop operators of greater precedence'''
        while prec < stk[-1][1]:
            result.append(stk.pop()[0])
            
    def pushRight(op, prec):
        '''push right associative operator on stack'''
        popGop(prec)
        stk.append( (op, prec) )
            
    # convert unsigned literal to internal form
    
    def convertUint(ulit, base):
        
        uint = 0
        
        # isolate the significant portion of 'ulit'
        
        p = re.search('[1-9A-F][0-9A-F]*', ulit.upper())
        
        if p != None:
            for digit in p.group():
                digval = '0123456789ABCDEF'.find(digit)
                if uint <= (intMax - digval)/base:
                    uint =  uint * base + digval
                else:
                    return parseErr(f'\'{ulit}\' is out of range')
        
        result.append(uint)
        return True
    
    # test if expression starts with given regular expression
    
    def startsWith(regex):
        
        nonlocal expr, start, token
        
        p = re.match(regex, expr)
        if p == None:
            return False
        else:
            token = p.group()              # what we matched
            start += len(token)            # update to next match position in original string
            expr = expr[len(token):]       # "chop off" what we matched
            PTshowtoken(token)             # trace
            return True
                 
    # top level main loop
    
    while ok and len(expr):
        
        _ = startsWith('[ ]+')                             # skip leading whitespace
            
        PTshowexpr(expr)                                   # trace
            
        # look for operand
             
        if wantoperand:
            
            if startsWith('[-+]'):
                '''unary negation or plus ?'''
                pushRight( 'U' + token, 80 )                # decorate
                
            else:
        
                wantoperand = False                         # flip
            
                if startsWith('0[xX][0-9a-fA-F]+'):
                    '''unsigned hexadecimal literal ?'''
                    ok = convertUint(token, 16)
    
                elif startsWith('[0-9]+'):
                    '''unsigned decimal literal ?'''
                    ok = convertUint(token, 10)
    
                else:
                    '''malformed'''
                    ok = parseErr('Expecting operand')
            
        # look for operator
        
        else:
            
            wantoperand = True                                # flip
            
            if startsWith('[*]'):
                '''binary multiplication ?'''
                pushLeft( token, 70 )
            
            elif startsWith('[-+]'):
                '''binary addition or subtraction ?'''
                pushLeft( 'B' + token, 60 )                   # decorate
                
            else:
                '''malformed'''
                ok = parseErr('Expecting operator')
            
        PTshowparse(ok, result, stk )                         # trace
        
    if ok and wantoperand:
        ok = parseErr('Unexpected end of expression')         # must be in 'wantoperator' state
    
    if ok:
        popGEop( 3 )                                          # clear operator stack
                   
    return (ok, result if ok else None)                       # done

### How it works

In the *wantoperator* state, we add a check for the **'\*'** character which indicates multiplication. This is a separate check because the operator has a higher precedence than addition or subtraction. However multiplication has a lower precedence than negation, so we give it a precedence value between those two.

>Another way to organize this is to group all binary operators together in one regular expression so we only make one check. If we find one, then use the actual token found as an index into a table of precedence values.

>In Python we can use a [dictionary](https://docs.python.org/3/tutorial/datastructures.html#dictionaries) to accomplish this. The process might look something like this:

```Python
precedenceOf = dict([('-', 60), ('+', 60), ('*', 70)])    # initialization

...

if startsWith( '[-+*]' ):
    pushLeft( 'B' + token, precedenceOf[token] )          # 'wantoperator' state
```  
               
>This arrangement becomes more and more attractive as the number of operators we want to recognize increases, because the code in the main loop is less cluttered and easier to read. A drawback is that the precedence table has to be initialized, and as the number of operators increases that process can be made just as cluttered as the mess we're trying to get away from in the main loop.

>Nevertheless we will eventually adopt something similar to this strategy.

# Evaluator

In [None]:
# operators handled:
# - unary negation, plus
# - binary addition, subtraction, multiplication

# errors detected:
# - out of range

# return tuple:
# - (True, result)
# - (False, None)

def EEdoeval(rpn):
    
    stk = []
    ok = True
    
    def inRange(ok, val):
        '''range check test result'''
        if ok:
            stk.append( val )
        else:
            UIerror( 'Evaluation result out of range' )
        return ok
    
    def unNeg():
        '''unary negation'''
        arg = stk.pop()
        return inRange( arg != intMin, -arg )
            
    def binAdd():
        '''binary addition'''
        rgt = stk.pop()
        lft = stk.pop()
        
        if lft >= 0:
            return inRange( rgt <= intMax - lft, lft+rgt )       
        else:
            return inRange( rgt >= intMin - lft, lft+rgt )
        
    def binSub():
        '''binary subtraction'''
        rgt = stk.pop()
        lft = stk.pop()
        
        if lft >= 0:
            return inRange( lft - intMax <= rgt, lft-rgt )
        else:
            return inRange( lft - intMin >= rgt, lft-rgt )
        
    def binMul():
        '''binary multiplication'''
        rgt = stk.pop()
        lft = stk.pop()
        
        if lft == 0 or rgt == 0:
            return inRange( True, 0 )
              
        # lft positive ?

        if lft > 0:

            # rgt positive ?
            # req: lft * rgt <= intMax

            if rgt > 0:
                return inRange( rgt <= intMax / lft, lft*rgt )

            # rgt negative
            # req: lft * rgt >= intMin

            else:
                return inRange( rgt >= intMin / lft, lft*rgt )


        # lft negative

        else:

            # rgt positive ?
            # req: lft * rgt >= intMin

            if rgt > 0:
                return inRange( rgt <= intMin / lft, lft * rgt )     # reverse direction of inequality

            # rgt negative
            # req: lft * rgt <= intMax

            else:
                return inRange( rgt >= intMax / lft, lft * rgt )     # reverse direction of inequality
          
    # main loop
        
    for v in rpn:
        
        ETshowtoken(v)
        
        if v == 'U-':          # unary negation ?
            ok = unNeg()
            
        elif v == 'B+':        # binary addition ?
            ok = binAdd()
            
        elif v == 'B-':        # binary subtraction ?
            ok = binSub()
            
        elif v == '*':         # binary multiplication ?
            ok = binMul()
            
        elif v != 'U+':        # it's probably an operand
            stk.append( v ) 
            
        if not ok:
            return (False, None)
         
        ETshoweval( ok, stk )
            
    return ( True, stk.pop() )


# How it works

In general, multiplication of two **M** bit numbers can yield a result up to **2 \* M** bits long. If we consider anything over **M** bits to be overflow, then there are more pairs of integers than can cause overflow than pairs which do not.

>Basically any two numbers whose most significant one bit positions add up to more than **M**. For example, 65,636 (2\*\*16) and 131,072 (2\*\*17) are both much less than 4,294,967,296 (2\*\*32). But multiply them together and the result is 8,589,934,592 (2\*\*33), overflowing a 32-bit value.

Those who worry about multiplication overflow are typically offered two pieces of advice:

- do the multiplication in a larger type
- do the multiplication in assembly language.

This after-the-fact approach of the first suggestion doesn’t work if there is no larger type to convert into. Though if we were not committed to a before-the-fact approach we could easily implement it here. Python 3's infinite precision integers allow any size result, after which we could compare to our artificial limit at leisure.

In other languages we'd still be mostly safe because the underlying [floating point arithmetic](https://en.wikipedia.org/wiki/Floating-point_arithmetic) of most high level languages actually is [IEEE 754](https://en.wikipedia.org/wiki/IEEE_754) double precision, if only because most current processers have built-in hardware support for it. Casting a float (24 bits precision) to a double (53 bits precision) before multiplying would guarantee we could always check that the result does not exceed 48 bits. We would not be perfectly safe though, since the 33-bit precision of our numbers could potentially create 66-bit results. So we won’t be doing that.

The second recommendation takes advantage of the fact that the arithmetic-logical unit of almost all hardware processors can signal that a result has overflowed, ie., has more bits than a hardware register can hold. This information is easy to get at in the processor's assembly language, but almost no high level language has an ability to access it. As Python is one of those languages, we won’t be doing that either.

What we will do is try to detect the possibility of multiplication overflow before it happens. As before, we’ll begin with the four operand sign combinations to get an idea of what we’re up against:

1. positive * positive: greater than *intMax* possible
2. positive * negative: less than *intMin* possible
3. negative * positive: less than *intMin* possible
4. negative * negative: greater than *intMax* possible

The straightforward way to test for pending overflow is to divide whatever the limit is by one of the operands. We choose the left operand. While by appearances it might seem we could get away with only two tests, unfortunately that is not so.  Because of the algebraic rule that dividing or multiplying an inequality by a negative number requires that the sense of the inequality be reversed, we need four different tests.

>Actually five tests, since we also have to check that *lft* is not zero. As long as we must do that, it seems cleaner to check *rgt* at the same time.

Perhaps there are faster ways to check for pending overflow. There is still no denying that making any checks at all increases the overall execution time. Which may explain why overflow checks are so rarely encountered in production code. Checking takes time, and there are those who consider a fast result better than an accurate one.

It’s often possible to get away with this. Sometimes overflow happens but leads to nothing more serious than a [game abruptly ending](https://www.mentalfloss.com/uk/games/31376/why-does-donkey-kong-break-on-level-22). Other times it might be the root cause of an expensive [rocket launch failure](https://en.wikipedia.org/wiki/Cluster_%28spacecraft%29).

## Running the parser

In [None]:
passCnt = failCnt = 0                       # most useful for test input files, but never any harm

def startUp(flag):
    '''begin execution'''
    global passCnt, failCnt, showTrace
    UIshow( 'Parser', versionNumber )
    passCnt = failCnt = 0
    showTrace = flag
    
def shutDown():
    '''terminate execution'''
    UIwriteSep()
    UIshow( 'Pass', passCnt )
    UIshow( 'Fail', failCnt )
    
# run parser

def parseOne(this):
    '''parse/evaluate one expression'''
    global passCnt, failCnt
    UIwriteSep()
    UIshow( 'Input', this )
    ok, res = PEdoparse( this )
    if ok:
        UIshow( 'Final Parse', res )
        ok, res = EEdoeval( res )
        if ok:
            UIshow( 'FinalEval', res )
    if ok:
        passCnt += 1
    else:
        failCnt += 1

## Interactive use

In [None]:
def parse():
    
    startUp(showInteract)
    while True:
        inp = input( 'Expression: ' )
        UIwriteln( '' )                      # looks better with a blank line here
        if inp.upper()[0] == 'Q':
            break
        elif inp.strip():
            parseOne( inp )
    shutDown()

## Batch processing

In [None]:
testDir = '..\\ParserTest\\'            # directory holding test input files (empty string if same as notebook directory)

# convert current version number to match test file numbers
# - done this way so we can update only the version number and everything still works

def currNum():
    
    head = versionNumber[:len(versionNumber)-3]
    tail = versionNumber[-2:]
    return f'{head:0>2}{tail}'

# make full path name to test file

def makePath(typ, num):
    return f'{testDir}{typ}{num}.txt'

# run one test

def runTest(this):
    
    UIwriteln(f'Parser {versionNumber} vs {this[-12:-4]}')
    
    with open(this) as f:
        data = f.readlines()
    for line in data:
        test = line.strip()
        if test and test[0] != '#':         # skip blank and comment lines
            parseOne(test)
    
# run a test of current or specified version which should succeed
    
def good(num='curr'):
  
    startUp(showBatch)
    runTest(makePath('pass', currNum() if num == 'curr' else num))
    shutDown()
    
# run a test of current or specified version which should fail

def bad(num='curr'):
    
    startUp(showBatch)
    runTest(makePath('fail', currNum() if num == 'curr' else num))
    shutDown()
    
# run regression test against current and all previous test files

def regress():
            
    UIwriteln('PASS tests')
    
    currFn = makePath('pass', currNum())

    startUp(showBatch)
    failed = []
    fnlist = glob.glob(f'{testDir}pass????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = failCnt
            runTest(fn)
            if atstart < failCnt:
                failed.append(fn)               
    shutDown()
    
    UIwriteln('FAIL tests')
    
    currFn = makePath('fail',currNum())

    startUp(showBatch)
    passed = []
    fnlist = glob.glob(f'{testDir}fail????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = passCnt
            runTest(fn)
            if atstart < passCnt:
                passed.append(fn)               
    shutDown()
    
    if not len(failed):
        UIwriteln('All pass tests succeded')
    else:
        UIwriteln('Pass tests which failed')
        for fn in failed:
            UIwriteln(f'  {fn}')
            
    if not len(passed):
        UIwriteln('All fail tests succeded')
    else:
        UIwriteln('Fail tests which passed')
        for fn in passed:
            UIwriteln(f'   {fn}')
              

# Testing the parser

In [None]:
parse()       # interactive, one expression at a time

In [None]:
good()        # run current parser against its own pass test. Use good('1234') to run against specific pass test.

In [None]:
bad()         # run current parser against its own fail test. Use bad('5678') to run against specific fail test.

In [None]:
regress()     # run parser against all previous and current tests