# Parser 6.10 - Reorganization

This version does not add any new capabilties but instead begins the process of reorganizing the parser and evaluator in ways that should make adding new capabilities easier. These changes are not specifically related to classes *per se* - they could just as well have been applied to the functional versions of the parser and evaluator - but their form is definitely affected by them.

## Libraries

In [None]:
import glob       # for searching directories

import re         # for regular exprssions

## User output

In [None]:
visSep = '-------------'             # visual separator

def UIwriteln(this):
    '''write a single line to output'''
    print( f'{this}\n' )
    
def UIwriteSep():
    '''write a visual separator'''
    UIwriteln( visSep )

def UIshow(tag, value):
    '''write a tagged value to output'''
    UIwriteln( f'{tag}: {value}' )

def UIerror(this):
    '''write an error message to output'''
    UIshow( 'Error', this )

# Tracing

In [None]:
# flags: show trace of processing

showInteract = True          # default for interactive use
showBatch = False            # default for batch use

showTrace = None             # control flag

# Trace Output

def TOshow(mesg, text):
    '''write trace message to output if enabled'''
    if showTrace:
        UIshow( f'{mesg:15s}', text )
        
def TOstring(tag, this):
    
    if showTrace:
        TOshow( tag, ' '.join([str(e) for e in this]) )

# -----------------------
# Parse Tracing
# -----------------------

def PTshowexpr(this):

    TOshow( 'Parse', visSep )
    TOshow( 'Current Expr', this )

def PTshowparse(ok, res, stk):

    if ok:
        TOstring( 'Current RPN', res )
        TOstring( 'Operator Stack', stk )

def PTshowtoken(this):

    if not this[0] == ' ':
        TOshow( "Found Token", this )

# -----------------------
# Evaluation Tracing
# -----------------------

def ETshowtoken(this):
    
    TOshow( 'Eval', visSep )
    TOshow( 'Current token', this )

def ETshoweval(stk):
    
    TOstring( 'Operand Stack', stk )


# Parser

In [None]:
# operands accepted:
# - decimal integer literals
# - hexadecimal integer literals

# operators accepted:
# - unary negation, plus
# - binary addition, subtraction, multiplication, division
# - grouping parentheses

# errors detected:
# - unrecognized input
# - out of range numeric input
# - malformed expression

# result tuple:
# - (True, [parse])
# - (False, None)

class Parser(object):
    
    VERSIONNUMBER = '6.10'
    
    _INTMAX =  4294967295                # 2**32-1
    _INTMIN = -4294967296                # -(2**32)

    _unPrefxOp = '[-+]'                                    # unary operators
 
    _unPrec = { '-': 80, '+': 80 }                         # unary operator precedence
    
    _binInfxOp = '[-+*/]'                                  # binary operators
        
    _binPrec = { '-': 60, '+': 60, '*': 70, '/': 70 }      # binary operator precedence

    def __init__(self):
        pass
           
    def doparse(self, this):

        # initialize

        expr = this                # save to new variable but retain original for error reports
        start = 15                 # tracked so we can report where in an expression an error occurred
        token = None               # anything successfully matched
        ok = wantoperand = True    # flags
        result = []                # rpn expression
        stk = [ ('EOE', 1) ]       # operator stack

        def parseErr(mesg):
            '''report parse error'''
            UIerror(mesg)
            UIwriteln(f'>>> {this}')
            UIwriteln(f'{"^^near here".rjust(start)}')
            return False

        def popGEop(prec):
            '''pop operators of equal or greater precedence'''
            while prec <= stk[-1][1]:
                result.append(stk.pop()[0])

        def pushLeft(op, prec):
            '''push left associative operator on stack'''
            popGEop(prec)
            stk.append( (op, prec) )

        def popGop(prec):
            '''pop operators of greater precedence'''
            while prec < stk[-1][1]:
                result.append(stk.pop()[0])

        def pushRight(op, prec):
            '''push right associative operator on stack'''
            popGop(prec)
            stk.append( (op, prec) )

        def popUntil(op, prec):
            '''clear and check operator stack'''
            popGEop(prec)
            if op == stk.pop()[0]:      # top remaining operator is the one we want to see ?
                return True
            elif op == '(':
                return parseErr('Unmatched right parenthesis')
            elif op == 'EOE':
                return parseErr('Unmatched left parenthesis')

        def convertUint(ulit, base):
            '''convert unsigned literal to internal form'''

            uint = 0

            # isolate the significant portion of 'ulit'

            p = re.search('[1-9A-F][0-9A-F]*', ulit.upper())

            if p != None:
                for digit in p.group():
                    digval = '0123456789ABCDEF'.find(digit)
                    if uint <= (self._INTMAX - digval)/base:
                        uint =  uint * base + digval
                    else:
                        return parseErr(f'\'{ulit}\' is out of range')

            result.append(uint)
            return True

        def startsWith(regex):
            '''test if expression starts with given regular expression'''
            nonlocal expr, start, token

            p = re.match(regex, expr)
            if p == None:
                return False
            else:
                token = p.group()              # what we matched
                start += len(token)            # update to next match position in original string
                expr = expr[len(token):]       # "chop off" what we matched
                PTshowtoken(token)             # trace
                return True

        # top level main loop

        while ok and len(expr):

            _ = startsWith('[ ]+')                             # skip leading whitespace
 
            PTshowexpr(expr)                                   # trace

            # look for operand

            if wantoperand:

                if startsWith('[(]'):
                    '''left parenthesis ?'''
                    stk.append( ('(', 2) )


                elif startsWith(self._unPrefxOp):
                    '''unary prefix ?'''
                    pushRight( 'U' + token, self._unPrec[token] )

                else:

                    wantoperand = False                         # flip

                    if startsWith('0[xX][0-9a-fA-F]+'):
                        '''unsigned hexadecimal literal ?'''
                        ok = convertUint(token, 16)

                    elif startsWith('[0-9]+'):
                        '''unsigned decimal literal ?'''
                        ok = convertUint(token, 10)

                    else:
                        '''malformed'''
                        ok = parseErr('Expecting operand')

            # look for operator

            else:

                if startsWith('[)]'):
                    ok = popUntil( '(', 4 )

                else:

                    wantoperand = True                                # flip

                    if startsWith(self._binInfxOp):
                        '''binary infix ?'''
                        pushLeft( 'B' + token, self._binPrec[token] )

                    else:
                        '''malformed'''
                        ok = parseErr('Expecting operator')

            PTshowparse(ok, result, stk )                         # trace

        if ok:
            if wantoperand:
                ok = parseErr('Unexpected end of expression')     # must be in 'wantoperator' state   
            else:
                ok = popUntil( 'EOE', 3 )                         # clear operator stack

        return (ok, result if ok else None)                       # done

### How it works

The first thing we do is change what were instance variables in the previous version into class variables in this version. Class variables are apparently more Pythonic for holding values we do not expect to change much (if at all) from instance to instance or over the lifetime of any specific instance. Instance variables can certainly serve the same purpose, but are usually reserved for values that we know will differ between instances or that may change during the lifetime of an instance.

>Since in this version we have no instance variables, we don't really need an *\_\_init\_\_()* function to initialize them. We leave a null version in place anyway, more or less as a reminder that we can if we ever need any instance variables.

>In an *\_\_init\_\_()* function only the code explicitly provided executes.  A null *\_\_init\_\_()* in a child class prevents any implicit call to the parent class *\_\_init\_\_()*. In this case that is not a problem. *Parser*'s parent class *Object* provides only an empty *\_\_init\_\_()* which does nothing.

We add several new class variables. First two regular expressions, *_unPrefxOp* and *_binInfxOp*, to hold our recognized unary prefix and binary infix operators, respectively. Then two dictionaries, *_unPrec{}* and *_binPrec{}*, which relate each operator to its precedence.

This allows us to replace separate checks for operators of the same type but different precedences with one check for all operators of a given type. If we find one, we look up its precedence in its associated dictionary and use that when pushing it on the operator stack.

The goal here is to make it easy to add new operators and precedences to the parser just by adding to these new regular expressions and tables. Grouping them near each other should serve as a reminder to update everything necessary when adding operators.

Inside the *Parser* class we have to qualify any reference to class or instance variables with *self* so that any instance of a *Parser* knows that its own copies of those variables are what we are referring to.

In practice, so far no real change results from altering how unary prefix operators are handled. The main reason for making the change to them anyway is to reduce cognitive overload by handling all operators in a consistent manner.

Checking for binary operators in the *wantoperator* state now requires only one test instead of two. This makes the main parse loop smaller and slightly faster.

# Evaluator

In [None]:
# operators handled:
# - unary negation, plus
# - binary addition, subtraction, multiplication, division

# errors detected:
# - out of range
# - division by zero

# return tuple:
# - (True, result)
# - (False, None)

class Evaluator(Parser):
    
    def __init__(self):
        pass
    
    def doeval(self, rpn):
 
        def inRange(ok, val):
            '''range check test result'''
            if ok:
                stk.append( val )
            else:
                UIerror( 'Evaluation result out of range' )
            return ok
 
        def unPlu(arg):
            '''unary plus'''
            return inRange(True, arg)

        def unNeg(arg):
            '''unary negation'''
            return inRange( arg != self._INTMIN, -arg )

        def binAdd(rgt, lft):
            '''binary addition'''
            if lft >= 0:
                return inRange( rgt <= self._INTMAX - lft, lft+rgt )       
            else:
                return inRange( rgt >= self._INTMIN - lft, lft+rgt )

        def binSub(rgt, lft):
            '''binary subtraction'''
            if lft >= 0:
                return inRange( lft - self._INTMAX <= rgt, lft-rgt )
            else:
                return inRange( lft - self._INTMIN >= rgt, lft-rgt )

        def binMul(rgt, lft):
            '''binary multiplication'''
            if lft == 0 or rgt == 0:
                return inRange( True, 0 )

            if lft > 0:
                if rgt > 0:
                    return inRange( rgt <= self._INTMAX / lft, lft*rgt )
                else:
                    return inRange( rgt >= self._INTMIN / lft, lft*rgt )

            else:
                if rgt > 0:
                    return inRange( rgt <= self._INTMIN / lft, lft * rgt )
                else:
                    return inRange( rgt >= self._INTMAX / lft, lft * rgt )

        def binDiv(rgt, lft):
            '''binary division'''
            if rgt != 0:
                return inRange( True, lft//rgt )      # floored division so result is an integer
            else:
                UIerror( 'Division by zero' )
                return False
            
        # initialize
                                
        unDispatch = {
            'U-': unNeg,
            'U+': unPlu
        }
        
        binDispatch = {
            'B+': binAdd,
            'B-': binSub,
            'B*': binMul,
            'B/': binDiv
        }
  
        stk = []
        ok = True

        # main loop
        
        for v in rpn:

            ETshowtoken(v)
            
            if v in binDispatch:
                ok = binDispatch[v](stk.pop(), stk.pop())
                
            elif v in unDispatch:
                ok = unDispatch[v](stk.pop())
                
            else:
                stk.append( v )

            if not ok:
                return ( False, None )

            ETshoweval( stk )

        return ( True, stk.pop() )


### How it works

We change *\_\_init\_\_()* to a null function, since we know that *Parser* currently does not have any instance variables we need to initialize. But we still leave it in place. If in the future we change what *Parser* does during instanciation but forget to change this, we'll get an explicit error whereever we rely on what *Parser* does. Which should be a clue.

That's not the main idea we want to introduce here. What we really want is to introduce indirect function calls in order to make operator dispatch simpler. This makes the main loop cleaner and more compact. As a bonus, we can also pass the required number of arguments to the operator functions directly, rather than making each one pop them for itself. This too makes the overall code cleaner and more compact.

>We are relying here on Python's guarantee to evaluate function arguments in left-to-right order. For binary operators we know which argument will be *lft* and which will be *rgt*, which we must distinguish for non-commutative operators.

We must initialize the dictionaries containing the indirect function references within *doeval()*. We cannot create them as class or instance variables because Python cannot "see" inside *doeval()* to where they are nested.

>Actually we can, if we are willing to un-nest the functions within *doeval()* and bring them out to the same level as it.

In the main loop we check to see if the current token is a binary or unary operator. If so, we call its corresponding function in the associated dictionary to perform it. If the current token is not an operator , we push it on the operand stack directly.

We have also introduced a null function for the unary plus operator *U+* that simply passes its argument to *inRange()*. We could have left it out of *unDispatch{}* and let the final branch check for it as in previous versions, but that's not really in the spirit of what we're trying to accomplish here.

## Running the parser

In [None]:
passCnt = failCnt = 0                       # most useful for test input files, but never any harm

myParser = myEvaluator = None               # where we keep instances of our classes

def startUp(flag):
    '''begin execution'''
    global passCnt, failCnt, showTrace
    global myParser, myEvaluator
    if not myParser:
        myParser = Parser()
    if not myEvaluator:
        myEvaluator = Evaluator()
    UIshow( 'Parser', myParser.VERSIONNUMBER )
    passCnt = failCnt = 0
    showTrace = flag
    
def shutDown():
    '''terminate execution'''
    UIwriteSep()
    UIshow( 'Pass', passCnt )
    UIshow( 'Fail', failCnt )
    
# run parser

def parseOne(this):
    '''parse/evaluate one expression'''
    global passCnt, failCnt
    UIwriteSep()
    UIshow( 'Input', this )
    ok, res = myParser.doparse( this )
    if ok:
        UIshow( 'Parse', res )
        ok, res = myEvaluator.doeval( res )
        if ok:
            UIshow( 'Eval', res )
    if ok:
        passCnt += 1
    else:
        failCnt += 1

## Interactive use

In [None]:
def parse():
    
    startUp(showInteract)
    while True:
        inp = input( 'Expression: ' )
        UIwriteln( '' )                      # looks better with a blank line here
        if inp.upper()[0] == 'Q':
            break
        elif inp.strip():
            parseOne( inp )
    shutDown()

## Batch processing

In [None]:
testDir = '..\\ParserTest\\'            # directory holding test input files (empty string if same as notebook directory)

# convert current version number to match test file numbers
# - done this way so we can update only the version number and everything still works

def currNum():
    
    head = myParser.VERSIONNUMBER[:len(myParser.VERSIONNUMBER)-3]
    tail = myParser.VERSIONNUMBER[-2:]
    return f'{head:0>2}{tail}'

# make full path name to test file

def makePath(typ, num):
    return f'{testDir}{typ}{num}.txt'

# run one test

def runTest(this):
    
    UIwriteln(f'Parser {myParser.VERSIONNUMBER} vs {this[-12:-4]}')
    
    with open(this) as f:
        data = f.readlines()
    for line in data:
        test = line.strip()
        if test and test[0] != '#':         # skip blank and comment lines
            parseOne(test)
    
# run a test of current or specified version which should succeed
    
def good(num='curr'):
  
    startUp(showBatch)
    runTest(makePath('pass', currNum() if num == 'curr' else num))
    shutDown()
    
# run a test of current or specified version which should fail

def bad(num='curr'):
    
    startUp(showBatch)
    runTest(makePath('fail', currNum() if num == 'curr' else num))
    shutDown()
    
# run regression test against current and all previous test files

def regress():
            
    UIwriteln('PASS tests')
    
    startUp(showBatch)                       # must create objects before we can access variables inside them 
    currFn = makePath('pass', currNum())
    failed = []
    fnlist = glob.glob(f'{testDir}pass????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = failCnt
            runTest(fn)
            if atstart < failCnt:
                failed.append(fn)               
    shutDown()
    
    UIwriteln('FAIL tests')
    
    startUp(showBatch)
    currFn = makePath('fail',currNum())
    passed = []
    fnlist = glob.glob(f'{testDir}fail????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = passCnt
            runTest(fn)
            if atstart < passCnt:
                passed.append(fn)                
    shutDown()
    
    if not len(failed):
        UIwriteln('All pass tests succeded')
    else:
        UIwriteln('Pass tests which failed')
        for fn in failed:
            UIwriteln(f'  {fn}')
            
    if not len(passed):
        UIwriteln('All fail tests succeded')
    else:
        UIwriteln('Fail tests which passed')
        for fn in passed:
            UIwriteln(f'   {fn}')
              

# Testing the parser

In [None]:
parse()       # interactive, one expression at a time

In [None]:
good('0500')  # use last version with functional changes

In [None]:
bad('0500')   # use last version with functional changes

In [None]:
regress()     # run parser against all previous tests

Because this version has no new capabilities, there are no new tests for it. We can still run it against any previous version's tests (and should, to make sure we didn't mess anything up by accident).