# Parser 7.00 - Logical Negation, Equality and Inequality

The goal of this version is to find out how easy it is to add additional right associative unary and left associative binary operators to the parser as it stands.

The logical operators chosen, while handy in their own right, are in some ways just a means to that end.

## Libraries

In [None]:
import glob       # for searching directories

import re         # for regular exprssions

## User output

In [None]:
visSep = '-------------'             # visual separator

def UIwriteln(this):
    '''write a single line to output'''
    print( f'{this}\n' )
    
def UIwriteSep():
    '''write a visual separator'''
    UIwriteln( visSep )

def UIshow(tag, value):
    '''write a tagged value to output'''
    UIwriteln( f'{tag}: {value}' )

def UIerror(this):
    '''write an error message to output'''
    UIshow( 'Error', this )

# Tracing

In [None]:
# flags: show trace of processing

showInteract = True          # default for interactive use
showBatch = False            # default for batch use

showTrace = None             # control flag

# Trace Output

def TOshow(mesg, text):
    '''write trace message to output if enabled'''
    if showTrace:
        UIshow( f'{mesg:15s}', text )
        
def TOstring(tag, this):
    
    if showTrace:
        TOshow( tag, ' '.join([str(e) for e in this]) )

# -----------------------
# Parse Tracing
# -----------------------

def PTshowexpr(this):

    TOshow( 'Parse', visSep )
    TOshow( 'Current Expr', this )

def PTshowparse(ok, res, stk):

    if ok:
        TOstring( 'Current RPN', res )
        TOstring( 'Operator Stack', stk )

def PTshowtoken(this):

    if not this[0] == ' ':
        TOshow( "Found Token", this )

# -----------------------
# Evaluation Tracing
# -----------------------

def ETshowtoken(this):
    
    TOshow( 'Eval', visSep )
    TOshow( 'Current token', this )

def ETshoweval(stk):
    
    TOstring( 'Operand Stack', stk )


# Parser

In [None]:
# operands accepted:
# - decimal integer literals
# - hexadecimal integer literals

# operators accepted:
# - unary negation, plus
# - binary addition, subtraction, multiplication, division
# - grouping parentheses
# - logical not, equality, inequality

# errors detected:
# - unrecognized input
# - out of range numeric input
# - malformed expression

# result tuple:
# - (True, [parse])
# - (False, None)

class Parser(object):
    
    VERSIONNUMBER = '7.00'
    
    _INTMAX =  4294967295                # 2**32-1
    _INTMIN = -4294967296                # -(2**32)

    _unPrefxOp = '[-+!]'                                   # unary operators
 
    _unPrec = { '-': 80, '+': 80, '!': 80 }                # unary operator precedence
    
    _binInfxOp = '[-+*/]|==|!='                            # binary operators
        
    _binPrec = {
        '*': 70, '/': 70,
        '-': 60, '+': 60,
        '==': 50, '!=': 50
    }

    def __init__(self):
        pass
           
    def doparse(self, this):

        # initialize

        expr = this                # save to new variable but retain original for error reports
        start = 15                 # tracked so we can report where in an expression an error occurred
        token = None               # anything successfully matched
        ok = wantoperand = True    # flags
        result = []                # rpn expression
        stk = [ ('EOE', 1) ]       # operator stack

        def parseErr(mesg):
            '''report parse error'''
            UIerror(mesg)
            UIwriteln(f'>>> {this}')
            UIwriteln(f'{"^^near here".rjust(start)}')
            return False

        def popGEop(prec):
            '''pop operators of equal or greater precedence'''
            while prec <= stk[-1][1]:
                result.append(stk.pop()[0])

        def pushLeft(op, prec):
            '''push left associative operator on stack'''
            popGEop(prec)
            stk.append( (op, prec) )

        def popGop(prec):
            '''pop operators of greater precedence'''
            while prec < stk[-1][1]:
                result.append(stk.pop()[0])

        def pushRight(op, prec):
            '''push right associative operator on stack'''
            popGop(prec)
            stk.append( (op, prec) )

        def popUntil(op, prec):
            '''clear and check operator stack'''
            popGEop(prec)
            if op == stk.pop()[0]:      # top remaining operator is the one we want to see ?
                return True
            elif op == '(':
                return parseErr('Unmatched right parenthesis')
            elif op == 'EOE':
                return parseErr('Unmatched left parenthesis')

        def convertUint(ulit, base):
            '''convert unsigned literal to internal form'''

            uint = 0

            # isolate the significant portion of 'ulit'

            p = re.search('[1-9A-F][0-9A-F]*', ulit.upper())

            if p != None:
                for digit in p.group():
                    digval = '0123456789ABCDEF'.find(digit)
                    if uint <= (self._INTMAX - digval)/base:
                        uint =  uint * base + digval
                    else:
                        return parseErr(f'\'{ulit}\' is out of range')

            result.append(uint)
            return True

        def startsWith(regex):
            '''test if expression starts with given regular expression'''
            nonlocal expr, start, token

            p = re.match(regex, expr)
            if p == None:
                return False
            else:
                token = p.group()              # what we matched
                start += len(token)            # update to next match position in original string
                expr = expr[len(token):]       # "chop off" what we matched
                PTshowtoken(token)             # trace
                return True

        # top level main loop

        while ok and len(expr):

            _ = startsWith('[ ]+')                             # skip leading whitespace

            PTshowexpr(expr)                                   # trace

            # look for operand

            if wantoperand:

                if startsWith('[(]'):
                    '''left parenthesis ?'''
                    stk.append( ('(', 2) )


                elif startsWith(self._unPrefxOp):
                    '''unary prefix ?'''
                    pushRight( 'U' + token, self._unPrec[token] )

                else:

                    wantoperand = False                         # flip

                    if startsWith('0[xX][0-9a-fA-F]+'):
                        '''unsigned hexadecimal literal ?'''
                        ok = convertUint(token, 16)

                    elif startsWith('[0-9]+'):
                        '''unsigned decimal literal ?'''
                        ok = convertUint(token, 10)

                    else:
                        '''malformed'''
                        ok = parseErr('Expecting operand')

            # look for operator

            else:

                if startsWith('[)]'):
                    ok = popUntil( '(', 4 )

                else:

                    wantoperand = True                                # flip

                    if startsWith(self._binInfxOp):
                        '''binary infix ?'''
                        pushLeft( 'B' + token, self._binPrec[token] )

                    else:
                        '''malformed'''
                        ok = parseErr('Expecting operator')

            PTshowparse(ok, result, stk )                         # trace

        if ok:
            if wantoperand:
                ok = parseErr('Unexpected end of expression')     # must be in 'wantoperator' state   
            else:
                ok = popUntil( 'EOE', 3 )                         # clear operator stack

        return (ok, result if ok else None)                       # done

### How it works

Changes to the parser are minimal and confined entirely to four class variables.

We add three new operators (taken from the [C programming language](https://en.wikipedia.org/wiki/C_(programming_language)#Operators), as most of our operators have been and will be), one to *_unPrefxOp* and two to *_binInfxOp*. We also add them and their precedences to the *_unPrec{}* and *_binPrec{}* dictionaries.

The only slight complication is that **'=='** and **'!='** are two characters each rather than one. That only means we can't include them in a regular expression class and instead must specify them as alternative matches in *_binInfxOp*.

>If we wanted the logical negation operator to be Python's **'not'** instead of **'!'**, we could specify it the same way as an alternative match in *_unPrefxOp*.

That's it. The parser can now recognize these logical operators in expressions.

>If there's a price to be paid for this simplicity, it's that alternative possible parses are ignored. This version will parse
```Python
A logop B logop C
```
>as
```Python
(A logop B) logop C
```
>where the evaluation of any *logop* is either numeric one (*True*) or zero (*False*) and uses that value as the left operand of the second *logop*.

>Python itself parses
```Python
A logop B logop C
```
>as
```Python
(A logop B) and (B logop C)
```
>Evaluation of any *logop* is either *True* or *False*, a Boolean result which is not directly comparable to an integer.


# Evaluator

In [None]:
# operators handled:
# - unary negation, plus
# - binary addition, subtraction, multiplication, division
# - logical negation, equality and inequality

# errors detected:
# - out of range
# - division by zero

# return tuple:
# - (True, result)
# - (False, None)

class Evaluator(Parser):
    
    def __init__(self):
        pass
 
    def doeval(self, rpn):
 
        def inRange(ok, val):
            '''range check test result'''
            if ok:
                stk.append( val )
            else:
                UIerror( 'Evaluation result out of range' )
            return ok
        
        def pushOperand(val):
            '''push operand on stack'''
            stk.append( val )
            return True
    
#        def unPlu(arg):
#           '''unary plus'''
#           return pushOperand( arg )

        def unNeg(arg):
            '''unary negation'''
            return inRange( arg != self._INTMIN, -arg )
        
        def unNot(arg):
            '''logical not'''
            return pushOperand( not arg )

        def binAdd(rgt, lft):
            '''binary addition'''
            if lft >= 0:
                return inRange( rgt <= self._INTMAX - lft, lft+rgt )       
            else:
                return inRange( rgt >= self._INTMIN - lft, lft+rgt )

        def binSub(rgt, lft):
            '''binary subtraction'''
            if lft >= 0:
                return inRange( lft - self._INTMAX <= rgt, lft-rgt )
            else:
                return inRange( lft - self._INTMIN >= rgt, lft-rgt )

        def binMul(rgt, lft):
            '''binary multiplication'''
            if lft == 0 or rgt == 0:
                return pushOperand( 0 )

            if lft > 0:
                if rgt > 0:
                    return inRange( rgt <= self._INTMAX / lft, lft*rgt )
                else:
                    return inRange( rgt >= self._INTMIN / lft, lft*rgt )

            else:
                if rgt > 0:
                    return inRange( rgt <= self._INTMIN / lft, lft * rgt )
                else:
                    return inRange( rgt >= self._INTMAX / lft, lft * rgt )

        def binDiv(rgt, lft):
            '''binary division'''
            if rgt != 0:
                return pushOperand( lft//rgt )      # floored division so result is an integer
            else:
                UIerror( 'Division by zero' )
                return False
            
        def binEqu(rgt, lft):
            '''logical equality'''
            return pushOperand( lft == rgt )
        
        def binNeq(rgt, lft):
            '''logical inequality'''
            return pushOperand( lft != rgt )
        
        # initialize
                                
        unDispatch = {
            'U-': unNeg,
            'U+': pushOperand,
            'U!': unNot
        }
        
        binDispatch = {
            'B+': binAdd,
            'B-': binSub,
            'B*': binMul,
            'B/': binDiv,
            'B==': binEqu,
            'B!=': binNeq
        }
  
        stk = []
        ok = True

        # main loop
        
        for v in rpn:

            ETshowtoken(v)
            
            if v in binDispatch:
                ok = binDispatch[v](stk.pop(), stk.pop())
                
            elif v in unDispatch:
                ok = unDispatch[v](stk.pop())
                
            else:
                stk.append( v )

            if not ok:
                return ( False, None )

            ETshoweval( stk )

        return ( True, stk.pop() )


### How it works

The three functions that perform our three new operations are all much simpler than any of the arithmetic functions. Since their results are always only either zero (*False*) or one (*True*), they cannot fail and there is no need to do any error checking.

Once these functions are added to *doeval()*, we also enter them into our dispatch dictionaries. We introduce a new function *pushOperand()* to handle the case of pushing a value on the operand stack without requiring a check. We update *binMul()* and *binDiv()* to use it as well, and implement *U+* by pointing it directly to *pushOperand()*.

>While we certainly could continue to use *inRange()* to do this task, six potential users of a more direct approach with less overhead makes the change seem worth making. If we wished, we could also update *inRange()* to use it as well. That's a matter of taste.

And that's it. Writing three small functions and adding their names to the appropriate dictionaries is all that is required (the fourth is optional).

While we could go on adding every right associative unary and left associative binary operator we could think of, doing so would not teach us anything new. So in the next version we’ll move on to something else.

## Running the parser

In [None]:
passCnt = failCnt = 0                       # most useful for test input files, but never any harm

myParser = myEvaluator = None               # where we keep instances of our classes

def startUp(flag):
    '''begin execution'''
    global passCnt, failCnt, showTrace
    global myParser, myEvaluator
    if not myParser:
        myParser = Parser()
    if not myEvaluator:
        myEvaluator = Evaluator()
    UIshow( 'Parser', myParser.VERSIONNUMBER )
    passCnt = failCnt = 0
    showTrace = flag
    
def shutDown():
    '''terminate execution'''
    UIwriteSep()
    UIshow( 'Pass', passCnt )
    UIshow( 'Fail', failCnt )
    
# run parser

def parseOne(this):
    '''parse/evaluate one expression'''
    global passCnt, failCnt
    UIwriteSep()
    UIshow( 'Input', this )
    ok, res = myParser.doparse( this )
    if ok:
        UIshow( 'Final Parse', res )
        ok, res = myEvaluator.doeval( res )
        if ok:
            UIshow( 'Final Eval', res )
    if ok:
        passCnt += 1
    else:
        failCnt += 1

## Interactive use

In [None]:
def parse():
    
    startUp(showInteract)
    while True:
        inp = input( 'Expression: ' )
        UIwriteln( '' )                      # looks better with a blank line here
        if inp.lower() == 'quit':
            break
        elif inp.strip():
            parseOne( inp )
    shutDown()

## Batch processing

In [None]:
testDir = '..\\ParserTest\\'            # directory holding test input files (empty string if same as notebook directory)

# convert current version number to match test file numbers
# - done this way so we can update only the version number and everything still works

def currNum():
    
    head = myParser.VERSIONNUMBER[:len(myParser.VERSIONNUMBER)-3]
    tail = myParser.VERSIONNUMBER[-2:]
    return f'{head:0>2}{tail}'

# make full path name to test file

def makePath(typ, num):
    return f'{testDir}{typ}{num}.txt'

# run one test

def runTest(this):
    
    UIwriteln(f'Parser {myParser.VERSIONNUMBER} vs {this[-12:-4]}')
    
    with open(this) as f:
        data = f.readlines()
    for line in data:
        test = line.strip()
        if test and test[0] != '#':         # skip blank and comment lines
            parseOne(test)
    
# run a test of current or specified version which should succeed
    
def good(num='curr'):
  
    startUp(showBatch)
    runTest(makePath('pass', currNum() if num == 'curr' else num))
    shutDown()
    
# run a test of current or specified version which should fail

def bad(num='curr'):
    
    startUp(showBatch)
    runTest(makePath('fail', currNum() if num == 'curr' else num))
    shutDown()
    
# run regression test against current and all previous test files

def regress():
            
    UIwriteln('PASS tests')
    
    startUp(showBatch)                       # must create objects before we can access variables inside them 
    currFn = makePath('pass', currNum())
    failed = []
    fnlist = glob.glob(f'{testDir}pass????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = failCnt
            runTest(fn)
            if atstart < failCnt:
                failed.append(fn)               
    shutDown()
    
    UIwriteln('FAIL tests')
    
    startUp(showBatch)
    currFn = makePath('fail',currNum())
    passed = []
    fnlist = glob.glob(f'{testDir}fail????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = passCnt
            runTest(fn)
            if atstart < passCnt:
                passed.append(fn)                
    shutDown()
    
    if not len(failed):
        UIwriteln('All pass tests succeded')
    else:
        UIwriteln('Pass tests which failed')
        for fn in failed:
            UIwriteln(f'  {fn}')
            
    if not len(passed):
        UIwriteln('All fail tests succeded')
    else:
        UIwriteln('Fail tests which passed')
        for fn in passed:
            UIwriteln(f'   {fn}')
              

# Testing the parser

In [None]:
parse()       # interactive, one expression at a time

In [None]:
good()        # run current parser against its own pass test. Use good('1234') to run against specific pass test.

In [None]:
bad()         # run current parser against its own fail test. Use bad('5678') to run against specific fail test.

In [None]:
regress()     # run parser against all previous tests