# Parser 4.00 - Arithmetic Subtraction

The goal for this version of the parser is to be able to perform arithmetic subtraction.

Because addition and subtraction have the same precedence, changes to the parser are as minimal as they were for adding unary plus. Changes to the evaluator are a little more extensive.

## Libraries

In [None]:
import glob       # for searching directories

import re         # for regular exprssions

## User output

In [None]:
visSep = '-------------'             # visual separator

def UIwriteln(this):
    '''write a single line to output'''
    print( f'{this}\n' )
    
def UIwriteSep():
    '''write a visual separator'''
    UIwriteln( visSep )

def UIshow(tag, value):
    '''write a tagged value to output'''
    UIwriteln( f'{tag}: {value}' )

def UIerror(this):
    '''write an error message to output'''
    UIshow( 'Error', this )

# Tracing

In [None]:
# flags: show trace of processing

showInteract = True          # default for interactive use
showBatch = False            # default for batch use

showTrace = None             # control flag

# Trace Output

def TOshow(mesg, text):
    '''write trace message to output if enabled'''
    if showTrace:
        UIshow( f'{mesg:15s}', text )
        
def TOstring(tag, this):
    
    if showTrace:
        TOshow( tag, ' '.join([str(e) for e in this]) )

# -----------------------
# Parse Tracing
# -----------------------

def PTshowexpr(this):

    TOshow( 'Parse', visSep )
    TOshow( 'Current Expr', this )

def PTshowparse(ok, res, stk):

    if ok:
        TOstring( 'Current RPN', res )
        TOstring( 'Operator Stack', stk )

def PTshowtoken(this):

    if not this[0] == ' ':
        TOshow( "Found Token", this )

# -----------------------
# Evaluation Tracing
# -----------------------

def ETshowtoken(this):
    
    TOshow( 'Eval', visSep )
    TOshow( 'Current token', this )

def ETshoweval(ok, stk):
    
    if ok:
        TOstring( 'Operand Stack', stk )


# Common

In [None]:
intMax =  4294967295                # 2**32-1, for range checking
intMin = -4294967296                # -(2**32)

# Parser

In [None]:
versionNumber = '4.00'

# operands accepted:
# - decimal integer literals
# - hexadecimal integer literals

# operators accepted:
# - unary negation, plus
# - binary addition, subtraction

# errors detected:
# - unrecognized input
# - out of range numeric input
# - malformed expression

# result tuple:
# - (True, [parse])
# - (False, None)

def PEdoparse(this):
       
    # initialize
    
    expr = this                # save to new variable but retain original for error reports
    start = 15                 # tracked so we can report where in an expression an error occurred
    token = None               # anything successfully matched
    ok = wantoperand = True    # flags
    result = []                # rpn expression
    stk = [ ('EOE', 1) ]       # operator stack
               
    def parseErr(mesg):
        '''report parse error'''
        UIerror(mesg)
        UIwriteln(f'>>> {this}')
        UIwriteln(f'{"^^near here".rjust(start)}')
        return False
     
    def popGEop(prec):
        '''pop operators of equal or greater precedence'''
        while prec <= stk[-1][1]:
            result.append(stk.pop()[0])
            
    def pushLeft(op, prec):
        '''push left associative operator on stack'''
        popGEop(prec)
        stk.append( (op, prec) )
            
    def popGop(prec):
        '''pop operators of greater precedence'''
        while prec < stk[-1][1]:
            result.append(stk.pop()[0])
            
    def pushRight(op, prec):
        '''push right associative operator on stack'''
        popGop(prec)
        stk.append( (op, prec) )
            
    # convert unsigned literal to internal form
    
    def convertUint(ulit, base):
        
        uint = 0
        
        # isolate the significant portion of 'ulit'
        
        p = re.search('[1-9A-F][0-9A-F]*', ulit.upper())
        
        if p != None:
            for digit in p.group():
                digval = '0123456789ABCDEF'.find(digit)
                if uint <= (intMax - digval)/base:
                    uint =  uint * base + digval
                else:
                    return parseErr(f'\'{ulit}\' is out of range')
        
        result.append(uint)
        return True
    
    # test if expression starts with given regular expression
    
    def startsWith(regex):
        
        nonlocal expr, start, token
        
        p = re.match(regex, expr)
        if p == None:
            return False
        else:
            token = p.group()              # what we matched
            start += len(token)            # update to next match position in original string
            expr = expr[len(token):]       # "chop off" what we matched
            PTshowtoken(token)             # trace
            return True
                 
    # top level main loop
    
    while ok and len(expr):
        
        _ = startsWith('[ ]+')                             # skip leading whitespace
            
        PTshowexpr(expr)                                   # trace
            
        # look for operand
             
        if wantoperand:
            
            if startsWith('[-+]'):
                '''unary negation or plus ?'''
                pushRight( 'U' + token, 80 )                # decorate
                
            else:
        
                wantoperand = False                         # flip
            
                if startsWith('0[xX][0-9a-fA-F]+'):
                    '''unsigned hexadecimal literal ?'''
                    ok = convertUint(token, 16)
    
                elif startsWith('[0-9]+'):
                    '''unsigned decimal literal ?'''
                    ok = convertUint(token, 10)
    
                else:
                    '''malformed'''
                    ok = parseErr('Expecting operand')
            
        # look for operator
        
        else:
            
            wantoperand = True                                # flip
            
            if startsWith('[-+]'):
                '''binary addition or subtraction ?'''
                pushLeft( 'B' + token, 60 )                   # decorate
                
            else:
                '''malformed'''
                ok = parseErr('Expecting operator')
            
        PTshowparse(ok, result, stk )                         # trace
        
    if ok and wantoperand:
        ok = parseErr('Unexpected end of expression')         # must be in 'wantoperator' state
    
    if ok:
        popGEop( 3 )                                          # clear operator stack
                   
    return (ok, result if ok else None)                       # done

### How it works

In the *wantoperator* state, we simply add a **'+'** character to the character class already consisting of just one **'-'** character. As with unary plus, we don't worry about exactly which operator we matched.

We do not need to do anything else. We can easily distinguish between **-** and *U-*, just as we already do for **+** and *U+*. We'll decorate them anyway with a *B* (for *B*inary) prefix and push them on the operator stack.

>If nothing else this makes them easier to read in the Reverse Polish.

# Evaluator

In [None]:
# operators handled:
# - unary negation, plus
# - binary addition, subtraction

# errors detected:
# - out of range

# return tuple:
# - (True, result)
# - (False, None)

def EEdoeval(rpn):
    
    stk = []
    ok = True
    
    def inRange(ok, val):
        '''range check test result'''
        if ok:
            stk.append( val )
        else:
            UIerror( 'Evaluation result out of range' )
        return ok
    
    def unNeg():
        '''unary negation'''
        arg = stk.pop()
        return inRange( arg != intMin, -arg )
            
    def binAdd():
        '''binary addition'''
        rgt = stk.pop()
        lft = stk.pop()
        
        if lft >= 0:
            return inRange( rgt <= intMax - lft, lft+rgt )       
        else:
            return inRange( rgt >= intMin - lft, lft+rgt )
        
    def binSub():
        '''binary subtraction'''
        rgt = stk.pop()
        lft = stk.pop()
        
        # if left operand is positive:
        # required: lft - rgt <= intMax
        
        if lft >= 0:
            return inRange( lft - intMax <= rgt, lft-rgt )      # re-arranged to avoid overflow
                
        # if left operand is negative:
        # required: lft - rgt >= intMin
        
        else:
            return inRange( lft - intMin >= rgt, lft-rgt )      # re-arranged to avoid underflow       
              
            
    # main loop
        
    for v in rpn:
        
        ETshowtoken(v)
        
        if v == 'U-':          # unary negation ?
            ok = unNeg()
            
        elif v == 'B+':       # binary addition ?
            ok = binAdd()
            
        elif v == 'B-':       # binary subtraction ?
            ok = binSub()
            
        elif v != 'U+':       # it's probably an operand
            stk.append( v )
            
        if not ok:
            return (False, None)
         
        ETshoweval( ok, stk )
            
    return ( True, stk.pop() )


### How it works

As with addition, there are four possible sign combinations of operands we need to consider:

1. positive - positive: out of range not possible
2. positive - negative: greater than *intMax* possible
3. negative - positive: less than *intMin* possible
4. negative - negative: out of range not possible

The tests we need to perform to verify our result will be within range are easy enough to write, but I had to think about them a while to understand why they will always work.

In the case of a postive left operand, we need to make sure:

```Python
lft - rgt <= intMax
```

If the right operand is also postive, the result must be less than the left operand, so no overflow problem can happen. Neither can an underflow, as the minimum value occurs with:

```Python
0 - intMax == intMin + 1
```

But if the right operand is negative, then what happens is equivalent to adding its absolute value to the left operand and we need:

```Python
lft + rgt <= intMax
```

This looks very much like the condition for addition, but not quite. To avoid overflow during the test, we use:

```Python
lft - intMax <= rgt
```

Because *lft* is positive, the left hand side must be less than or equal to zero. If *rgt* is also postive, the condition is always true. If *rgt* is negative, this test checks that its absolute value is less than or equal to the absolute value of the left hand side.

Maybe so, but it still isn't immediately clear to me that it works. I have to try it to see.

Let's assume *lft* is *intMax*. Then the left hand size is zero. Every positive value of *rgt* works, and every negative one fails. Which is what we want, since no result value greater than *intMax* is allowed.

So far so good. What if *lft* is zero? Then the left hand side value is *intMin + 1* and every value of *rgt* except *intMin* works. Which is also what we want. Okay, I'm satisfied.

A similar anlysis can be performed for the case of *lft* being negative. We'll leave that as an exercise for the interested reader.

## Running the parser

In [None]:
passCnt = failCnt = 0                       # most useful for test input files, but never any harm

def startUp(flag):
    '''begin execution'''
    global passCnt, failCnt, showTrace
    UIshow( 'Parser', versionNumber )
    passCnt = failCnt = 0
    showTrace = flag
    
def shutDown():
    '''terminate execution'''
    UIwriteSep()
    UIshow( 'Pass', passCnt )
    UIshow( 'Fail', failCnt )
    
# run parser

def parseOne(this):
    '''parse/evaluate one expression'''
    global passCnt, failCnt
    UIwriteSep()
    UIshow( 'Input', this )
    ok, res = PEdoparse( this )
    if ok:
        UIshow( 'Final Parse', res )
        ok, res = EEdoeval( res )
        if ok:
            UIshow( 'Final Eval', res )
    if ok:
        passCnt += 1
    else:
        failCnt += 1

## Interactive use

In [None]:
def parse():
    
    startUp(showInteract)
    while True:
        inp = input( 'Expression: ' )
        UIwriteln( '' )                      # looks better with a blank line here
        if inp.upper()[0] == 'Q':
            break
        elif inp.strip():
            parseOne( inp )
    shutDown()

## Batch processing

In [None]:
testDir = '..\\ParserTest\\'            # directory holding test input files (empty string if same as notebook directory)

# convert current version number to match test file numbers
# - done this way so we can update only the version number and everything still works

def currNum():
    
    head = versionNumber[:len(versionNumber)-3]
    tail = versionNumber[-2:]
    return f'{head:0>2}{tail}'

# make full path name to test file

def makePath(typ, num):
    return f'{testDir}{typ}{num}.txt'

# run one test

def runTest(this):
    
    UIwriteln(f'Parser {versionNumber} vs {this[-12:-4]}')
    
    with open(this) as f:
        data = f.readlines()
    for line in data:
        test = line.strip()
        if test and test[0] != '#':         # skip blank and comment lines
            parseOne(test)
    
# run a test of current or specified version which should succeed
    
def good(num='curr'):
  
    startUp(showBatch)
    runTest(makePath('pass', currNum() if num == 'curr' else num))
    shutDown()
    
# run a test of current or specified version which should fail

def bad(num='curr'):
    
    startUp(showBatch)
    runTest(makePath('fail', currNum() if num == 'curr' else num))
    shutDown()
    
# run regression test against current and all previous test files

def regress():
            
    UIwriteln('PASS tests')
    
    currFn = makePath('pass', currNum())

    startUp(showBatch)
    failed = []
    fnlist = glob.glob(f'{testDir}pass????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = failCnt
            runTest(fn)
            if atstart < failCnt:
                failed.append(fn)               
    shutDown()
    
    UIwriteln('FAIL tests')
    
    currFn = makePath('fail',currNum())

    startUp(showBatch)
    passed = []
    fnlist = glob.glob(f'{testDir}fail????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = passCnt
            runTest(fn)
            if atstart < passCnt:
                passed.append(fn)               
    shutDown()
    
    if not len(failed):
        UIwriteln('All pass tests succeded')
    else:
        UIwriteln('Pass tests which failed')
        for fn in failed:
            UIwriteln(f'  {fn}')
            
    if not len(passed):
        UIwriteln('All fail tests succeded')
    else:
        UIwriteln('Fail tests which passed')
        for fn in passed:
            UIwriteln(f'   {fn}')
              

# Testing the parser

In [None]:
parse()       # interactive, one expression at a time

In [None]:
good()        # run current parser against its own pass test. Use good('1234') to run against specific pass test.

In [None]:
bad()         # run current parser against its own fail test. Use bad('5678') to run against specific fail test.

In [None]:
regress()     # run parser against all previous and current tests