# Parser 18.00 - String Logicals and Conditionals

In this version of the parser we reach the goal we set at the start of this series: parsing the ternary conditional operator with either string or numeric operands.

We also implement parsing string expressions for the closely related short-circuiting logical OR and logical AND conditional operators.

Finally, we fix a type checking bug that has been lurking for some time.

## Libraries

In [1]:
import glob       # for searching directories
import math
import random     # for 'random()'

import re         # for regular expressions

## User output

In [2]:
visSep = '-------------'             # visual separator

def UIwriteln(this):
    '''write a single line to output'''
    print( f'{this}\n' )
    
def UIwriteSep():
    '''write a visual separator'''
    UIwriteln( visSep )

def UIshow(tag, value):
    '''write a tagged value to output'''
    UIwriteln( f'{tag}: {value}' )

def UIerror(this):
    '''write an error message to output'''
    UIshow( 'Error', this )

# Tracing

In [3]:
# flags: show trace of processing

showInteract = True          # default for interactive use
showBatch = False            # default for batch use

showTrace = None             # control flag

# Trace Output

def TOshow(mesg, text):
    '''write trace message to output if enabled'''
    if showTrace:
        UIshow( f'{mesg:15s}', text )
        
def TOstring(tag, this):
    
    if showTrace:
        TOshow( tag, ' '.join([str(e) for e in this]) )

# -----------------------
# Parse Tracing
# -----------------------

def PTshowexpr(this):

    TOshow( 'Parse', visSep )
    TOshow( 'Current Expr', this )

def PTshowparse(ok, res, opStk, typStk):

    if ok:
        TOstring( 'Current RPN', res )
        TOstring( 'Operator Stack', opStk )
        TOstring( 'Type Stack', typStk )

def PTshowtoken(this):

    if not this[0] == ' ':
        TOshow( "Found Token", this )

# -----------------------
# Evaluation Tracing
# -----------------------

def ETshowtoken(this):
    
    TOshow( 'Eval', visSep )
    TOshow( 'Current token', this )

def ETshoweval(stk):
    
    TOstring( 'Operand Stack', stk )


# Functions

In [4]:
def functionErr(nam, msg, val):
    UIerror( f'{nam}(): {msg} value: {val}')
    return ( False, None )

def fncAbs(val):
    '''absolute value of val'''
    return (True, abs(val))

def fncChr(val):
    '''convert int to str'''
    if type(val) is int and  0 <= val < 0x10000:
        return (True, chr(val))
    else:
        return functionErr('CHR$', 'negative or non-integer', val)

def fncFnd(args):
    '''offset from 0 of needle in haystack'''
    needle = args[0]
    haystack = args[1]
    beg = args[2] if len(args) > 2 else 0
    end = args[3] if len(args) > 3 else len(haystack) + 1
    return (True, haystack.find(needle, beg, end))

def fncLen(string):
    '''string length'''
    return (True, len(string))

def fncMax(args):
    '''max of two or more vals'''
    return (True, max(args))

def fncMin(args):
    '''min of two more more vals'''
    return (True, min(args))

def fncOrd(args):
    if type(args) is list:
        s, p = args
    else:
        s = args
        p = 0
    if len(s):
        s = s[p:p+1]
    return( True, ord(s) if len(s) else -1)

def fncRand():
    '''random decimal'''
    return(True, random.random())

def fncRnd(val):
    '''rounded val'''
    if type(val) is not list:
        return (True, round(val))
    elif type(val[1]) is int:
        return (True, round(val[0], val[1]))
    else:
        return functionErr('ROUND', 'non-integer', val[1])

def fncSgn(val):
    '''sign of val'''
    return (True, 1 if val > 0 else -1 if val < 0 else 0 )

def fncSqt(val):
    '''square root of val'''
    if val >= 0:
        return (True, math.sqrt(val) )
    else:
        return functionErr('SQR', 'negative', val)
    
def fncSub(args):
    if type(args) is not list:
        return (True, args)
    beg = args[1]
    end = args[2] if len(args) > 2 else len(args[0]) + 1
    return( True, args[0][beg:end] )
    
# known functions

fncDispatch = {
     'ABS': (fncAbs,  1, 1,    ['number', 'number']),
    'CHR$': (fncChr,  1, 1,    ['string', 'number']),
    'FIND': (fncFnd,  2, 4,    ['number', 'string', 'string', 'number', 'number']),
     'LEN': (fncLen,  1, 1,    ['number', 'string']),
     'MAX': (fncMax,  2, None, ['number', 'number', 'number']),
     'MIN': (fncMin,  2, None, ['number', 'number', 'number']),
     'ORD': (fncOrd,  1, 2,    ['number', 'string', 'number']),
  'RANDOM': (fncRand, 0, 0,    ['number'] ),
   'ROUND': (fncRnd,  1, 2,    ['number', 'number', 'number']),
    'SIGN': (fncSgn,  1, 1,    ['number', 'number']),
    'SQRT': (fncSqt,  1, 1,    ['number', 'number']),
    'SUB$': (fncSub,  1, 3,    ['string', 'string', 'number', 'number'])
}


# Parser

In [5]:
# operands accepted:
# - decimal and hexadecimal floating point literals
# - scalar and array numeric variables
# - numeric functions with zero or more arguments
# - string literals w/ escape sequences
# - scalar and array string variables

# operators accepted:
# - unary negation, plus
# - binary addition, subtraction, multiplication, division
# - grouping parentheses
# - logical not, equality, inequality
# - assignment and shortcut assignment
# - prefix and postfix increment and decrement
# - logical short circuit
# - ternary conditional
# - string concatenation, multiplication
# - string logical not, equality, inequality
# - string variables, assignment, shortcut assignment

# errors detected:
# - unrecognized input
# - out of range numeric input
# - malformed expression

# result tuple:
# - (True, [parse])
# - (False, None)

class Parser(object):
    
    VERSIONNUMBER = '18.00'
    
    _FLTMAX =  4294967295                                  # 2**32-1
    _FLTMIN = -4294967296                                  # -(2**32)

    _expMax = {
        'P' : math.log2(_FLTMAX),                          # max base 2 exponent
        'E' : math.log10(_FLTMAX)                          # max base 10 exponent
    }

    _typeChkLst = {                                        # index is left-to-right, check right-to-left
        # special case
        '?err': [  None,   'tererr'],
         'a2x': [  None,   'argsep'],
         'f2x': [  None,   'fncsym'],
        # polymorphic disambiguation
       '$bb2b': [('nn2n', '$ss2s'), 'select'],
       '?bb2b': [('nn2n', 'ss2s'), 'select', 'C?-op'],
         'b2_': [('n2_', 's2_'), 'select'],
         'b2b': [('n2n', 's2s'), 'select'],
         'b2n': [('n2n', 's2n'), 'select'],
        'bb2n': [('nn2n', 'ss2n'), 'select'],
        'bn2b': [('n2n', '$s2s'), 'select', 'number'],
        'vb2b': [('vn2n', 'v$s2s'), 'select'],
        'vn2b': [('v2n', 'v$2s'), 'select', 'number'],
        'vn2v': [('v2v', 'v$2v$'), 'select', 'number'],
        # monomorphic
        '$s2s': ['string', 'string', 'strop'],
         'n2_': [  None,   'number'],
         'n2n': ['number', 'number'],
        'nn2n': ['number', 'number', 'number'],
         's2_': [  None,   'string', 'cmpnul'],
         's2n': ['number', 'string', 'cmpnul'],
         's2s': ['string', 'string'],
        'ss2n': ['number', 'string', 'string', 'strop'],
       '$ss2s': ['string', 'string', 'string', 'strop'],
        'ss2s': ['string', 'string', 'string'],
         'v2n': ['number', 'numsym'],
         'v2v': ['numsym', 'numsym'],
        'vn2n': ['number', 'numsym', 'number'],
        'v$2s': ['string', 'strsym', 'strop'],
       'v$2v$': ['strsym', 'strsym'],
       'v$s2s': ['string', 'strsym', 'string', 'strop']
         
    }
    
    def __init__(self):
        pass
        
    def doparse(self, this):

        def parseErr(mesg, pos):
            '''report parse error'''
            UIerror(mesg)
            UIwriteln(f'>>> {this}')
            if pos > 0:
                UIwriteln(f'{"^^near here".rjust(pos)}')
            return False

        # initialize

        expr = this                # save to new variable but retain original for error reports
        start = 15                  # tracked so we can report where in an expression an error occurred
        token = None               # anything successfully matched
        ok = wantoperand = True    # flags
        result = []                # rpn expression
        opStk = [ ('EOE', 1) ]     # operator stack
        typStk = []                # type stack
        argStk = []                # function argument count stack
        

        def typeCheck(op, chk):
            '''type check operands'''
            nonlocal argStk
            
            def peekTop(ndx):
                '''peek at top item on type stack'''
                return typStk[-1][ndx]
                        
            check = list(self._typeChkLst[chk])                    # list() to avoid aliasing
            
            while len(check) > 1:
                want = check.pop()
                
                # do we need to disambiguate a polymorphic operator ?
                
                if want == 'select':
                    have = peekTop(0)                               # type on top of stack
                    want = check.pop()                              # possible new checks ('check' now empty)
                    chk = want[ 0 if have.find('num') >= 0 else 1 ] # numeric or string ?
                    check = list(self._typeChkLst[chk])             # replace with disambiguated check
                    continue
                    
                # are we making a numeric operator into a string operator ?
                
                elif want == 'strop':
                    '''replace numeric op with string op'''
                    result.pop()
                    result.append( '$' + op )
                    continue
                    
                # insert 'compare string to null string' operator ?
                    
                elif want == 'cmpnul':
                    result.insert( -1, '$U!=')
                    continue
                    
                # are we checking a function argument separator ?
                
                elif want == 'argsep':
                    if len(opStk) < 2 or opStk[-2][0] != 'B(':     # within a function call ?
                        return parseErr('Unexpected comma', peekTop(2))
                    else:
                        argStk[-1] += 1                            # one more argument
                        result.pop()                               # remove 'F,' from RPN
                        continue                       
                        
                # are we going to check function argument types ?
                
                elif want == 'fncsym':

                    argcnt = argStk.pop()
                    have, rpnPos, errPos = typStk.pop(-argcnt - 1)

                    if have.find('sym') < 0:
                        return parseErr('Function name expected', errPos)

                    fnc = result[rpnPos - 1]
                    if not fnc in fncDispatch:
                        return parseErr(f'Unknown function name: {fnc}', errPos)
                    
                    mina = fncDispatch[fnc][1]
                    maxa = fncDispatch[fnc][2]
                    if (argcnt < mina) or (maxa != None and argcnt > maxa):
                        return parseErr(f'Bad argument count: {fnc}', errPos)
                    elif argcnt > 1:
                        result.pop()                         # remove 'B('
                        result.append(argcnt)                # argument count
                        result.append('F()')                 # multiple argument function call operator
                         
                    check = list(fncDispatch[fnc][3])        # replace with argument(s) checklist
                    if maxa == None:                         # no maximum ?
                        while argcnt >= len(check):
                            check.append( check[1] )
                    elif argcnt < maxa:                      # optional arguments not supplied ?
                        check = check[:-(maxa-argcnt)]       # remove them from checklist
                        
                    continue                   
                                           
                # did we want a right hand '?' ?
                
                elif want == 'C?-op':
                    if opStk.pop()[0] == 'C?-':
                        continue
                    return parseErr('":" without "?"', peekTop(2))
                        
                # '?' without ':' ?
                
                elif want == 'tererr':
                    return parseErr('"?" without ":"', peekTop(2))
                    
                # regular type checks
                         
                have, rpnPos, errPos = typStk.pop()
                    
                # no problem ?
                
                if want == have:
                    continue
                
                # did we want a number ?
                
                if want == 'number':
                    
                    if have == 'numsym':
                        '''convert numeric variables to values'''
                        result.insert( rpnPos, 'U*' )
                        
                    elif have.find('num') < 0:
                        return parseErr('Numeric value expected', errPos)
                        
                # did we want a string ?
                
                elif want == 'string':
                    
                    if have == 'strlit':
                        '''convert string literals to values'''
                        result.insert( rpnPos, 'U$')
                        
                    elif have == 'strsym':
                        result.insert( rpnPos, '$U*')
                        
                    elif have.find('str') < 0:
                        return parseErr('String value expected', errPos)
                    
                # did we want a variable ?
                
                elif want == 'numsym':
                    return parseErr('Numeric variable expected', errPos)
                
                elif want == 'strsym':
                    return parseErr('String variable expected', errPos)
                    
                # generic everything else
                
                else:
                    return parseErr('Type mismatch', errPos )
                        
            # push result type, RPN operator position, original operator position
                    
            restyp = check.pop()
            if restyp != None:
                typStk.append( (restyp, len(result), errPos) )
            return True
 
        def addOperand(op, typ):
            '''add operand to RPN'''
            result.append( op )
            typStk.append( (typ, len(result), start) )
            
        def addOperator():
            '''add operator to RPN'''            
            op, _, chk = opStk.pop()
            result.append( op )
            return typeCheck( op, chk )
            
        def popGEop(prec):
            '''pop operators of equal or greater precedence'''
            ok = True
            while ok and prec <= opStk[-1][1]:
                ok = addOperator()
            return ok
 
        def pushLeft(op, prec, chk):
            '''push left associative operator on stack'''
            if not popGEop(prec):
                return False
            opStk.append( (op, prec, chk) )
            return True

        def popGop(prec):
            '''pop operators of greater precedence'''
            ok = True
            while ok and prec < opStk[-1][1]:
                ok = addOperator()
            return ok
 
        def pushRight(op, prec, chk):
            '''push right associative operator on stack'''
            if not popGop(prec):
                return False
            opStk.append( (op, prec, chk) )
            return True

        def popUntil(op, prec):
            '''clear and check operator stack'''
            if not popGEop(prec):                       # type check failure ?
                return False

            topop = opStk.pop()[0]                      # found match ?
            if op == topop:
                return True
            
            elif op == '(':
                err = 'right parenthesis'
            elif op == '[':
                err = 'right bracket'
            elif topop == '(':
                err = 'left parenthesis'
            elif topop == '[':
                err = 'left bracket'
            else:
                err = 'EOE'
            
            return parseErr( f'Unmatched {err}', start )
   
        # operator dictionaries initialization
        
        _postOps = '[+]{2}|[-]{2}|\(\)'
        
        _postOp = {
            '++': (90, pushLeft, 'v2n'),
            '--': (90, pushLeft, 'v2n'),
            '()': (90, pushLeft, 'f2x')
        }

        _unOps = '!|[+]{1,2}|[-]{1,2}'

        _unOp =  {
            '-':  (80, pushRight, 'n2n'),
            '+':  (80, pushRight, 'n2n'),
            '!':  (80, pushRight, 'b2n'),
            '++': (80, pushRight, 'v2n'),
            '--': (80, pushRight, 'v2n'),
        }

        _binOps = '[-+*/]=?|[=!]?=|\[|\('

        _binOp = {
            '[':  (90, pushLeft,  'vn2v'),
            '(':  (90, pushLeft,  'f2x'),
            '*':  (70, pushLeft,  'bn2b'),
            '/':  (70, pushLeft,  'nn2n'),
            '+':  (60, pushLeft,  '$bb2b'),
            '-':  (60, pushLeft,  'nn2n'),
            '==': (50, pushLeft,  'bb2n'),
            '!=': (50, pushLeft,  'bb2n'),
            '=':  (10, pushRight, 'vb2b'),
            '*=': (10, pushRight, 'vn2b'),
            '/=': (10, pushRight, 'vn2n'),
            '+=': (10, pushRight, 'vb2b'),
            '-=': (10, pushRight, 'vn2n')
        }
        
        _condOps = '&&|[|]{2}|[?:]'
        
        _condOp = {
            '&&': [(40, pushLeft, 'b2_'),  (40, pushLeft, 'b2n')],
            '||': [(30, pushLeft, 'b2_'),  (30, pushLeft, 'b2n')],
            '?':  [(28, pushRight, 'b2_'), (24, pushRight, '?err')],
            ':':  [(26, pushRight, 'b2b'), (24, pushRight, '?bb2b')]
        }
      
        def convertFloat(fplit, base, capgrp):
            '''convert floating point literal to internal form'''
            
            def rangeErr():
                return parseErr(f'\'{fplit}\' is out of range', start)
            
            # collect the features of interest
                   
            p = re.search(capgrp, fplit.upper())
                            
            lint, lfrc, expbas, expsgn, lexp = p.group(1,2,4,5,6)
            
            # convert integer portion (if any)
 
            uint = 0
            if lint:
                p = re.search('[1-9A-F][0-9A-F]*', lint )
                if p != None:
                    for ch in p.group():
                        digval = '0123456789ABCDEF'.find(ch)
                        if uint <= (self._FLTMAX - digval)/base:
                            uint =  uint * base + digval
                        else:
                            return rangeErr()
                    
            # convert fractional portion (if any)
                    
            ufrc = 0
            if lfrc:
                fbase = 1
                for ch in lfrc:
                    digval = '0123456789ABCDEF'.find(ch)
                    fbase *= base
                    ufrc += digval/fbase
        
            if uint == self._FLTMAX and ufrc != 0:
                return rangeErr()
            
            # value so far
            
            uflt = uint + ufrc
 
            # convert exponent portion (if any)
            
            uexp = 0
            if lexp:
                for ch in lexp:
                    digval = '0123456789'.find(ch)
                    if uexp <= (self._expMax[expbas] - digval)/10:
                        uexp =  uexp * 10 + digval
                    else:
                        return rangeErr()
                    
            # adjust value by exponent (if any)
             
            if uexp:
                power = (2 if expbas == 'P' else 10) ** uexp
                if expsgn == '-':
                    uflt /= power
                elif uflt <= self._FLTMAX/power:
                    uflt *= power
                else:
                    return rangeErr()
                    
            addOperand( uflt, 'numlit' )
            return True

        def startsWith(regex):
            '''test if expression starts with given regular expression'''
            nonlocal expr, start, token

            p = re.match(regex, expr)
            if p == None:
                return False
            else:
                token = p.group()              # what we matched
                start += len(token)            # update to next match position in original string
                expr = expr[len(token):]       # "chop off" what we matched
                PTshowtoken(token)             # trace
                return True

        # top level main loop
              
        while ok and len(expr):

            _ = startsWith('[ ]+')                             # skip leading whitespace

            PTshowexpr(expr)                                   # trace

            # look for operand

            if wantoperand:

                if startsWith('[(]'):
                    '''left parenthesis ?'''
                    opStk.append( ('(', 2) )

                elif startsWith(_unOps):
                    '''right unary?'''
                    prec, assoc, check = _unOp[token]
                    assoc( 'U' + token, prec, check )

                else:

                    wantoperand = False                         # flip
                    
                    if startsWith('0[xX]([0-9a-fA-F]+([.][0-9a-fA-F]*)?|[.][0-9a-fA-F]+)([pP][-+]?[0-9]+)?'):
                        '''unsigned hexadecimal literal ?'''
                        ok = convertFloat(token, 16, '0X([0-9A-F]*)[.]?([0-9A-F]*)(([P])([-+])?([0-9]+))?')

                    elif startsWith('([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][-+]?[0-9]+)?'):
                        '''unsigned decimal literal?'''
                        ok = convertFloat(token, 10, '([0-9]*)[.]?([0-9]*)(([E])([-+])?([0-9]+))?')
                        
                    elif startsWith(r'[a-zA-Z][_a-zA-Z0-9]*(\$)?'):
                        '''numeric scalar variable or function name?'''
                        addOperand( token.upper(), 'strsym' if token[-1] == '$' else 'numsym' )
                        
                    elif startsWith(r'"([^"\\]|\\.)*"'):
                        '''string literal?'''
                        addOperand( token, 'strlit')
                        
                    else:
                        '''malformed'''
                        ok = parseErr('Expecting operand', start)

            # look for operator

            else:

                if startsWith('[)\]]'):
                    '''expression terminator?'''
                    ok = popUntil( '(' if token == ')' else '[', 4 )
                        
                    
                elif startsWith(_postOps):
                    '''postfix operator?'''
                    prec, assoc, check = _postOp[token]
                    ok = assoc( 'P' + token, prec, check )
                    if ok and token == '()':
                        argStk.append( 0 )
 
                else:
                    
                    wantoperand = True                          # flip

                    if startsWith(_binOps):
                        '''binary operator?'''
                        prec, assoc, check = _binOp[token]
                        ok = assoc( 'B' + token, prec, check )
                        if ok and (token == '[' or token == '('):
                            opStk.append( (token, 2) )
                            if token == '(':
                                argStk.append( 1 )
                                
                    elif startsWith(_condOps):
                        '''conditional operator?'''
                        op = 'C' + token
                        precl, assocl, checkl = _condOp[token][0]
                        precr, assocr, checkr = _condOp[token][1]
                        ok = assocl( op + '+', precl, checkl) and assocr( op + '-', precr, checkr)

                    elif startsWith(','):
                        '''function argument separator?'''
                        ok = pushLeft( 'F,', 6, 'a2x' )

                    else:
                        '''malformed'''
                        ok = parseErr('Expecting operator', start)

            PTshowparse(ok, result, opStk, typStk )               # trace
 
        if ok and wantoperand:
            ok = parseErr('Unexpected end of expression', start ) # must be in 'wantoperator' state   

        if ok:
            ok = popUntil( 'EOE', 3 )                             # clear operator stack
            
        if ok:
            ok = typeCheck('Final', 'b2b')                        # final type check

        return (ok, result if ok else None)                       # done
       

### How it works

In a conditional context, we define a string expression to be *True* if its result string is non-null and *False* if it is. This is quite similar to numeric expressions, which are *True* if non-zero and *False* if they are.

It's not quite as simple to determine the [truth value](https://en.wikipedia.org/wiki/Truth_value) of a string expression as it is for numeric ones. A string expression has a string result (obviously). We have to somenow convert that to a truth value.

>In practice we'll settle for a numeric value instead of a true Boolean value.

The straightforward way to convert a string value to a numeric value is to compare it against a null string:

```Python
str$ != ''
```

is *True* if *str$* is non-null, else *False*.

We could require that all string expressions used in a conditional context be explicitly compared to a null string. Or we could make that entirely optional by implicitly making the comparison if it isn't already explicit. We can easily do that by adding a new operator for that purpose to the Reverse Polish.

Which seems more like numeric expressions in conditional contexts, which do not have to be explicitly compared against zero. So we'll do that.

>It was discovered while testing this new operator that the two expressions:
```Python
"" == "" || 0
```
>and
```Python
"" != "" || 0
```
>produced the same result, namely *True*. Which they should not have, which was the bug. This was traced to flawed type checks of **==** and **!=**, which gave string results to string expressions rather than the numeric ones they should have (as did **!**, pernicious little thing...).

The *typeCheck()* function has been extensively revised in an effort to kill this bug. One of the main tasks accomplished during type checking is to convert polymorphic operators to monomorphic. Previously this was done in a somewhat ad-hoc manner, which turned out to overlook some possibilities. Patching that system to fix this bug appeared unappealing (ie., difficult). Trying a different approach seemed a better option.

Borrowing from the idea used for type checking function arguments, resolving polymorphic operators is now done using a separate list of type checks for each possible outcome. If an operator is to be treated as numeric, it has one list of type checks. If as a string, another.

A main advantage of this approach is that any list can be easily customized to the operator it is meant for.

>Another is that it should be easy to extend to additional types.

It is also very easy to add special action 'checks' to a list to do anything we want. We introduce:

- *'select'* - resolves a polymorphic operator. The next item in the check list is a tuple of indices into monomorphic check lists. *'select'* chooses one index from that tuple based on whatever type is at the top of the type stack, then replaces the current check list with the check list at that index
- *'strop'* - explicitly converts an originally numeric operator to a string operator
- *'cmpnul'* - inserts an implicit *compare string to null string* operator into the Reverse Polish

>Since in this design *select* must always be followed (or preceded, depending on point of view) by a tuple, if we wanted we could do without it. Instead we would test whether *want* is a tuple or not. A matter of taste.

We use *cmpnul* in conditional contexts to convert string results to numeric ones. We can freely mix string and numeric expressions on either side of the short-circuiting logical AND and logical OR operators.

A string expression can also be used in the conditional branch of the ternary conditional. The true and false branches may yield either string or numeric results, but both branches must have the same type. This is so the type matches any variable the result may be assigned to.

>Not that  [Python's](https://www.python.org/) type-fluid variables really care about such things. Something to consider when designing a language, though.

# Evaluator

In [6]:
# operators handled:
# - unary negation, plus
# - binary addition, subtraction, multiplication, division
# - numeric and string logical negation, equality and inequality
# - variable name de-reference
# - variable assignment and shortcut assignment
# - prefix and postfix increment and decrement
# - numeric array assignment and de-reference
# - function calls with one or more arguments
# - string literals w/ escape sequences
# - string addition, multiplication
# - string variable assignment and de-reference

# errors detected:
# - out of range
# - division by zero
# - invalid function arguments

# return tuple:
# - (True, result)
# - (False, None)

class Evaluator(Parser):
    
    def __init__(self):
        self._symTable = dict()
 
    def doeval(self, rpn):
        
        def evalErr(mesg):
            '''report evaluation time error'''
            UIerror(mesg)
            return False
         
        def pushOperand(val):
            '''push operand on stack'''
            stk.append( val )
            return True
 
        def inRange(ok, val):
            '''range check test result'''
            return pushOperand(val) if ok else evalErr( 'Evaluation result out of range' )
        
        def setSkip(down, up):
            '''set skip flags'''
            nonlocal skipLevel, downToken, upToken
            skipLevel = 1
            downToken = down
            upToken = up
            
        def checkSkip(skip, val, down, up):
            if skip:
                setSkip(down, up)
                stk.append(val)
            return True
                
        def logLftAnd(val):
            '''left branch of logical AND'''
            return checkSkip(val == 0, 0, 'C&&-', 'C&&+')
        
        def logLftOr(val):
            '''left branch of logical OR'''
            return checkSkip(val != 0, 1, 'C||-', 'C||+')
                
        def logRgt(val):
            '''right branch of logical AND and OR'''
            return pushOperand(1 if val != 0 else 0)
 
        def terCond(val):
            '''ternary condition'''
            if not val:
                setSkip('C:+', 'C?+')
            return True
        
        def terTrue(val):
            '''end of ternary true branch'''
            return checkSkip(True, val, 'C:-', 'C:+')
        
#        def terFalse(val):
#           '''end of ternary false branch'''
#           return pushOperand(val)
        
        def binAryNdx(ndx, nam):
            '''create array index'''
            i = int(ndx)
            return inRange(0 <= ndx <= self._FLTMAX, f'{nam}_{i}') if i == ndx else evalErr('Non-integer index')
                     
        def binVal(val, var):
            '''assign value to variable'''
            self._symTable[var] = val
            return pushOperand(val)
        
        def unVal(var):
            '''numeric variable name value'''
            return pushOperand(self._symTable[var] if var in self._symTable else 0)
        
        def unStrVal(var):
            '''string variable name value'''
            return pushOperand(self._symTable[var] if var in self._symTable else '')
        
        def unStr(val):
            '''string literal value'''
            
            def doMnem(matchObj):
                ch = matchObj.group()[1]
                return '\n' if ch == 'n' else '\t' if ch == 't' else ch

            def doHex(matchObj):
                p = matchObj.group()
                h = int(p[2:], 16)
                return chr(h & 0xFF if p[1] == 'x' else h)
                     
            s = val[1:-1]
            t = re.sub(r'\\x[0-9a-fA-F]{1,8}', doHex, s)
            u = re.sub(r'\\u[0-9a-fA-F]{4}', doHex, t)
            v = re.sub(r'\\.', doMnem, u)
            return pushOperand(v)

#        def unPlu(arg):
#           '''unary plus'''
#           return pushOperand( arg )

        def unNeg(arg):
            '''unary negation'''
            return inRange( arg != self._FLTMIN, -arg )
        
        def unNot(arg):
            '''logical not'''
            return pushOperand( not arg )
        
        def unStrNot(arg):
            '''string logical not'''
            return pushOperand( arg == '' )

        def binAdd(rgt, lft):
            '''binary addition'''
            if lft >= 0:
                return inRange( rgt <= self._FLTMAX - lft, lft+rgt )       
            else:
                return inRange( rgt >= self._FLTMIN - lft, lft+rgt )
 
        def binSub(rgt, lft):
            '''binary subtraction'''
            if lft >= 0:
                return inRange( lft - self._FLTMAX <= rgt, lft-rgt )
            elif rgt >= 0:
                return inRange( lft - self._FLTMIN >= rgt, lft-rgt )
            else:
                return inRange( lft <= self._FLTMAX + rgt, lft-rgt )
 
        def binMul(rgt, lft):
            '''binary multiplication'''
            if abs(lft) <= 1 or abs(rgt) <= 1:
                return pushOperand( lft * rgt )

            elif lft > 0:
                if rgt > 0:
                    return inRange( rgt <= self._FLTMAX / lft, lft * rgt )
                else:
                    return inRange( rgt >= self._FLTMIN / lft, lft * rgt )

            elif rgt > 0:
                return inRange( rgt <= self._FLTMIN / lft, lft * rgt )
  
            else:
                return inRange( rgt >= self._FLTMAX / lft, lft * rgt )
 
        def binDiv(rgt, lft):
            '''binary division'''
            if abs(rgt) >= 1:
                return pushOperand( lft/rgt )
            
            elif rgt > 0:
                if lft > 0 :
                    return inRange( lft <= self._FLTMAX * rgt, lft / rgt )
                else:
                    return inRange( lft >= self._FLTMIN * rgt, lft / rgt )
            
            elif rgt < 0:
                if lft > 0:
                    return inRange( lft <= self._FLTMIN * rgt, lft / rgt )
                else:
                    return inRange( lft >= self._FLTMAX * rgt, lft / rgt )

            else:
                return evalErr( 'Division by zero' )
            
        def binEqu(rgt, lft):
            '''logical equality'''
            return pushOperand( lft == rgt )
        
        def binNeq(rgt, lft):
            '''logical inequality'''
            return pushOperand( lft != rgt )
        
        def unStrNeq(val):
            '''compare string to null string'''
            return pushOperand( val != '' )
        
        def binShortVal(val, var, op):
            '''shortcut assignment'''
            _ = unVal(var)                           # put the value of the variable on the stack
            if op(val, stk.pop()):                   # perform the arithmetic
                return binVal(stk.pop(), var)        # if successful,  also assign result to variable
            return False
        
        def binAddVal(val, var):
            return binShortVal(val, var, binAdd)
        
        def binSubVal(val, var):
            return binShortVal(val, var, binSub)
        
        def binMulVal(val, var):
            return binShortVal(val, var, binMul)
        
        def binDivVal(val, var):
            return binShortVal(val, var, binDiv)
        
        def unPfxInc(var):
            return binShortVal(1, var, binAdd)
        
        def unPfxDec(var):
            return binShortVal(1, var, binSub)
        
        def unPostFix(val, var):
            '''postfix inc/dec'''
            _ = unVal(var)                             # push current value of variable on stack
            ok = binShortVal(val, var, binAdd)         # update value of variable
            stk.pop()                                  # remove updated value from stack (if error, removes first push)
            return ok
                
        def unPstInc(var):
            return unPostFix(1, var)
            
        def unPstDec(var):
            return unPostFix(-1, var)
        
        def binFnc(val, fnc):
            '''single argument function call'''
            ok, val = fncDispatch[fnc][0](val)
            return pushOperand(val) if ok else False
                
        def multiFnc(cnt):
            '''multiple argument function call'''
            nonlocal stk
            args = stk[-cnt:]
            stk = stk[:-cnt]
            ok, val = fncDispatch[stk.pop()][0](args)               
            return pushOperand(val) if ok else False
        
        def zeroFnc(name):
            '''zero argument function call'''
            ok, val = fncDispatch[name][0]()
            return pushOperand(val) if ok else False
        
        def strAdd(srgt, slft):
            '''string concatenation'''
            return pushOperand(slft + srgt)
        
        def strMul(cnt, strng):
            '''string repeat'''
            cnt = math.floor(cnt)
            res = ''
            while cnt > 0:
                if cnt & 0x01:
                    res += strng
                cnt >>= 1                  # or 'cnt //= 2'
                strng *= 2
            return pushOperand(res)
        
        def strAddVal(val, var):
            '''shortcut string concatenation'''
            _ = unStrVal(var)
            return binVal(stk.pop() + val, var)
        
        def strMulVal(cnt, var):
            '''shortcut string replication'''
            _ = unStrVal(var)
            _ = strMul(cnt, stk.pop())
            return binVal(stk.pop(), var)
        
        # initialize
                                
        unDispatch = {
            'U*': unVal, 'U$': unStr, '$U*': unStrVal,
            'U-': unNeg, 'U+': pushOperand,
            'U!': unNot, '$U!': unStrNot, '$U!=': unStrNeq,
            'U++': unPfxInc,   'U--': unPfxDec,
            'P++': unPstInc,   'P--': unPstDec,
            'F()': multiFnc,   'P()': zeroFnc,
            'C&&+': logLftAnd, 'C&&-': logRgt,
            'C||+': logLftOr,  'C||-': logRgt,
            'C?+': terCond,
            'C:+': terTrue, 'C:-': pushOperand
        }
        
        binDispatch = {
            'B+': binAdd, 'B-': binSub,
            'B+=': binAddVal, 'B-=': binSubVal,
            'B*': binMul, 'B/': binDiv,
            'B*=': binMulVal, 'B/=': binDivVal,
            'B==': binEqu, 'B!=': binNeq,
            'B=': binVal, '$B=': binVal,
            'B[': binAryNdx, 'B(': binFnc,
            '$B+': strAdd, '$B*': strMul,
            '$B==': binEqu, '$B!=': binNeq,
            '$B+=': strAddVal, '$B*=': strMulVal
        }
  
        skipLevel = 0
        downToken = upToken = None
        stk = []
        ok = True

        # main loop
        
        TOstring('Input', rpn)
        
        for v in rpn:

            ETshowtoken(v)

            if skipLevel > 0:
                TOshow('Skip level', f'{skipLevel}')
                if v == downToken:
                    skipLevel -= 1
                elif v == upToken:
                    skipLevel += 1
            
            elif v in binDispatch:
                ok = binDispatch[v](stk.pop(), stk.pop())
                
            elif v in unDispatch:
                ok = unDispatch[v](stk.pop())
                
            else:
                stk.append( v )

            if not ok:
                return ( False, None )

            ETshoweval( stk )

        return ( True, stk.pop() )


### How it works

We point the implicit *compare string to null string* operator *\\$U!=* at *unStrNeq()*. We re-point the *negate string* operator *$U!* at its own function *unStrNot()*, instead of at *unNot()* as previously.

>Probably not strictly necessary here, as Python treats **not** as polymorphic and can disambiguate for us.
>
>We note that the string assignment operator *\\$B=* is still hanging around, along with the original numeric one. This is also not strictly necessary, as attested by the fact that both assignment operators point to the same function.
>
>It is the result of **=** and **+=** sharing the same type check lists. Changing **+=** from a numeric to string operator when needed is necessary because of the way string addition works. But we don't need more than one assignment operator *unless* we want to store strings in a separate table distinct from numbers.
>
>By constrast we do need distinct string and numeric variable de-reference operators, as we've defined them to return different types when a variable name is not found in the symbol table.

## Running the parser

In [7]:
passCnt = failCnt = 0                          # most useful for test input files, but never any harm

myParser = myEvaluator = None                  # where we keep instances of our classes

def startUp(flag):
    '''begin execution'''
    global passCnt, failCnt, showTrace
    global myParser, myEvaluator
    if not myParser:
        myParser = Parser()
    if not myEvaluator:
        myEvaluator = Evaluator()
    UIshow( 'Parser', myParser.VERSIONNUMBER )
    passCnt = failCnt = 0
    showTrace = flag
    
def shutDown():
    '''terminate execution'''
    UIwriteSep()
    UIshow( 'Pass', passCnt )
    UIshow( 'Fail', failCnt )
    
# run parser
        
def parseOne(this):
    '''parse/evaluate one expression'''
    global passCnt, failCnt
    UIwriteSep()
    neg = this[0] == '@'
    if neg:
        this = this[1:]
    UIshow( 'Input', this )
    ok, res = myParser.doparse( this )
    if ok:
        UIshow( 'Final Parse', res )
        ok, res = myEvaluator.doeval( res )
        if ok:
            UIshow( 'Final Eval', res )
        if neg:
            ok = not ok
    if ok:
        passCnt += 1
    else:
        failCnt += 1

## Interactive use

In [8]:
def parse():
    
    startUp(showInteract)
    while True:
        inp = input( 'Expression: ' )
        UIwriteln( '' )                      # looks better with a blank line here
        if inp.upper()[0] == 'Q':
            break
        elif inp.strip():
            parseOne( inp )
    shutDown()

## Batch processing

In [9]:
testDir = '..\\ParserTest\\'            # directory holding test input files (empty string if same as notebook directory)

# convert current version number to match test file numbers
# - done this way so we can update only the version number and everything still works

def currNum():
    
    head = myParser.VERSIONNUMBER[:len(myParser.VERSIONNUMBER)-3]
    tail = myParser.VERSIONNUMBER[-2:]
    return f'{head:0>2}{tail}'

# make full path name to test file

def makePath(typ, num):
    return f'{testDir}{typ}{num}.txt'

# run one test

def runTest(this):
    
    UIwriteln(f'Parser {myParser.VERSIONNUMBER} vs {this[-12:-4]}')
    
    myEvaluator._symTable.clear()
    
    with open(this) as f:
        data = f.readlines()
    for line in data:
        test = line.strip()
        if test and test[0] != '#':         # skip blank and comment lines
            parseOne(test)
    
# run a test of current or specified version which should succeed
    
def good(num='curr'):
  
    startUp(showBatch)
    runTest(makePath('pass', currNum() if num == 'curr' else num))
    shutDown()
    
# run a test of current or specified version which should fail

def bad(num='curr'):
    
    startUp(showBatch)
    runTest(makePath('fail', currNum() if num == 'curr' else num))
    shutDown()
    
# run regression test against current and all previous test files

def regress():
            
    UIwriteln('PASS tests')
    
    startUp(showBatch)                       # must create objects before we can access variables inside them 
    currFn = makePath('pass', currNum())
    failed = []
    fnlist = glob.glob(f'{testDir}pass????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = failCnt
            runTest(fn)
            if atstart < failCnt:
                failed.append(fn)               
    shutDown()
    
    UIwriteln('FAIL tests')
    
    startUp(showBatch)
    currFn = makePath('fail',currNum())
    passed = []
    fnlist = glob.glob(f'{testDir}fail????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = passCnt
            runTest(fn)
            if atstart < passCnt:
                passed.append(fn)                
    shutDown()
    
    if not len(failed):
        UIwriteln('All pass tests succeded')
    else:
        UIwriteln('Pass tests which failed')
        for fn in failed:
            UIwriteln(f'  {fn}')
            
    if not len(passed):
        UIwriteln('All fail tests succeded')
    else:
        UIwriteln('Fail tests which passed')
        for fn in passed:
            UIwriteln(f'   {fn}')
              

# Testing the parser

In [None]:
parse()       # interactive, one expression at a time

In [None]:
good()        # run current parser against its own pass test. Use good('1234') to run against specific pass test.

In [None]:
bad()         # run current parser against its own fail test. Use bad('5678') to run against specific fail test.

In [None]:
regress()     # run parser against all previous tests