# Parser 8.10 - Floating Point Exponents

The goal of this version of the parser is to extend its recognition of floating point numbers by adding signed exponents to them.

However in order to continue exploring how to detect out of range values before they overflow, their range will *still* not be extended.

## Libraries

In [None]:
import glob       # for searching directories
import math       # for 'log--()' functions

import re         # for regular expressions

Our function to convert literal numbers needs some revision in order to accomodate both decimal and hexadecimal floats with exponents.

## User output

In [None]:
visSep = '-------------'             # visual separator

def UIwriteln(this):
    '''write a single line to output'''
    print( f'{this}\n' )
    
def UIwriteSep():
    '''write a visual separator'''
    UIwriteln( visSep )

def UIshow(tag, value):
    '''write a tagged value to output'''
    UIwriteln( f'{tag}: {value}' )

def UIerror(this):
    '''write an error message to output'''
    UIshow( 'Error', this )

# Tracing

In [None]:
# flags: show trace of processing

showInteract = True          # default for interactive use
showBatch = False            # default for batch use

showTrace = None             # control flag

# Trace Output

def TOshow(mesg, text):
    '''write trace message to output if enabled'''
    if showTrace:
        UIshow( f'{mesg:15s}', text )
        
def TOstring(tag, this):
    
    if showTrace:
        TOshow( tag, ' '.join([str(e) for e in this]) )

# -----------------------
# Parse Tracing
# -----------------------

def PTshowexpr(this):

    TOshow( 'Parse', visSep )
    TOshow( 'Current Expr', this )

def PTshowparse(ok, res, stk):

    if ok:
        TOstring( 'Current RPN', res )
        TOstring( 'Operator Stack', stk )

def PTshowtoken(this):

    if not this[0] == ' ':
        TOshow( "Found Token", this )

# -----------------------
# Evaluation Tracing
# -----------------------

def ETshowtoken(this):
    
    TOshow( 'Eval', visSep )
    TOshow( 'Current token', this )

def ETshoweval(stk):
    
    TOstring( 'Operand Stack', stk )


# Parser

In [None]:
# operands accepted:
# - decimal and hexadecimal floating point literals

# operators accepted:
# - unary negation, plus
# - binary addition, subtraction, multiplication, division
# - grouping parentheses
# - logical not, equality, inequality

# errors detected:
# - unrecognized input
# - out of range numeric input
# - malformed expression

# result tuple:
# - (True, [parse])
# - (False, None)

class Parser(object):
    
    VERSIONNUMBER = '8.10'
    
    _FLTMAX =  4294967295                                  # 2**32-1
    _FLTMIN = -4294967296                                  # -(2**32)

    _expMax = {
        'P' : math.log2(_FLTMAX),                          # max base 2 exponent
        'E' : math.log10(_FLTMAX)                          # max base 10 exponent
    }

    _unPrefxOp = '[-+!]'                                   # unary operators
 
    _unPrec = { '-': 80, '+': 80, '!': 80 }                # unary operator precedence
    
    _binInfxOp = '[-+*/]|==|!='                            # binary operators
        
    _binPrec = {
        '*': 70, '/': 70,
        '-': 60, '+': 60,
        '==': 50, '!=': 50
    }
    

    def __init__(self):
        pass
           
    def doparse(self, this):

        # initialize

        expr = this                # save to new variable but retain original for error reports
        start = 15                 # tracked so we can report where in an expression an error occurred
        token = None               # anything successfully matched
        ok = wantoperand = True    # flags
        result = []                # rpn expression
        stk = [ ('EOE', 1) ]       # operator stack

        def parseErr(mesg):
            '''report parse error'''
            UIerror(mesg)
            UIwriteln(f'>>> {this}')
            UIwriteln(f'{"^^near here".rjust(start)}')
            return False

        def popGEop(prec):
            '''pop operators of equal or greater precedence'''
            while prec <= stk[-1][1]:
                result.append(stk.pop()[0])

        def pushLeft(op, prec):
            '''push left associative operator on stack'''
            popGEop(prec)
            stk.append( (op, prec) )

        def popGop(prec):
            '''pop operators of greater precedence'''
            while prec < stk[-1][1]:
                result.append(stk.pop()[0])

        def pushRight(op, prec):
            '''push right associative operator on stack'''
            popGop(prec)
            stk.append( (op, prec) )

        def popUntil(op, prec):
            '''clear and check operator stack'''
            popGEop(prec)
            if op == stk.pop()[0]:      # top remaining operator is the one we want to see ?
                return True
            elif op == '(':
                return parseErr('Unmatched right parenthesis')
            elif op == 'EOE':
                return parseErr('Unmatched left parenthesis')
       
        def convertFloat(fplit, base, capgrp):
            '''convert floating point literal to internal form'''
            
            def rangeErr():
                return parseErr(f'\'{fplit}\' is out of range')
            
            # collect the features of interest
                   
            p = re.search(capgrp, fplit.upper())
                            
            lint, lfrc, expbas, expsgn, lexp = p.group(1,2,4,5,6)
            
            # convert integer portion (if any)
 
            uint = 0
            if lint:
                p = re.search('[1-9A-F][0-9A-F]*', lint )
                if p != None:
                    for ch in p.group():
                        digval = '0123456789ABCDEF'.find(ch)
                        if uint <= (self._FLTMAX - digval)/base:
                            uint =  uint * base + digval
                        else:
                            return rangeErr()
                    
            # convert fractional portion (if any)
                    
            ufrc = 0
            if lfrc:
                fbase = 1
                for ch in lfrc:
                    digval = '0123456789ABCDEF'.find(ch)
                    fbase *= base
                    ufrc += digval/fbase
        
            if uint == self._FLTMAX and ufrc != 0:
                return rangeErr()
            
            # value so far
            
            uflt = uint + ufrc
 
            # convert exponent portion (if any)
            
            uexp = 0
            if lexp:
                for ch in lexp:
                    digval = '0123456789'.find(ch)
                    if uexp <= (self._expMax[expbas] - digval)/10:
                        uexp =  uexp * 10 + digval
                    else:
                        return rangeErr()
                    
            # adjust value by exponent (if any)
             
            if uexp:
                power = (2 if expbas == 'P' else 10) ** uexp
                if expsgn == '-':
                    uflt /= power
                elif uflt <= self._FLTMAX/power:
                    uflt *= power
                else:
                    return rangeErr()
                    
            result.append(uflt)
            return True

        def startsWith(regex):
            '''test if expression starts with given regular expression'''
            nonlocal expr, start, token

            p = re.match(regex, expr)
            if p == None:
                return False
            else:
                token = p.group()              # what we matched
                start += len(token)            # update to next match position in original string
                expr = expr[len(token):]       # "chop off" what we matched
                PTshowtoken(token)             # trace
                return True

        # top level main loop

        while ok and len(expr):

            _ = startsWith('[ ]+')                             # skip leading whitespace

            PTshowexpr(expr)                                   # trace

            # look for operand

            if wantoperand:

                if startsWith('[(]'):
                    '''left parenthesis ?'''
                    stk.append( ('(', 2) )


                elif startsWith(self._unPrefxOp):
                    '''unary prefix ?'''
                    pushRight( 'U' + token, self._unPrec[token] )

                else:

                    wantoperand = False                         # flip
                    
                    if startsWith('0[xX]([0-9a-fA-F]+([.][0-9a-fA-F]*)?|[.][0-9a-fA-F]+)([pP][-+]?[0-9]+)?'):
                        '''unsigned hexadecimal literal ?'''
                        ok = convertFloat(token, 16, '0X([0-9A-F]*)[.]?([0-9A-F]*)(([P])([-+])?([0-9]+))?')

                    elif startsWith('([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][-+]?[0-9]+)?'):
                        '''unsigned decimal literal?'''
                        ok = convertFloat(token, 10, '([0-9]*)[.]?([0-9]*)(([E])([-+])?([0-9]+))?')

                    else:
                        '''malformed'''
                        ok = parseErr('Expecting operand')

            # look for operator

            else:

                if startsWith('[)]'):
                    ok = popUntil( '(', 4 )

                else:

                    wantoperand = True                                # flip

                    if startsWith(self._binInfxOp):
                        '''binary infix ?'''
                        pushLeft( 'B' + token, self._binPrec[token] )

                    else:
                        '''malformed'''
                        ok = parseErr('Expecting operator')

            PTshowparse(ok, result, stk )                         # trace

        if ok:
            if wantoperand:
                ok = parseErr('Unexpected end of expression')     # must be in 'wantoperator' state   
            else:
                ok = popUntil( 'EOE', 3 )                         # clear operator stack

        return (ok, result if ok else None)                       # done

### How it works

Decimal floating point literals with exponents should be quite familiar. In essence they are simply an optional suffix to the forms we already recognize:

- 123E4
- 123.e+20
- 123.4e-10

However the letter *E* used to prefix the value of the exponent in decimal floating point values presents a problem. Because *E* also represents a hexadecimal value, and because *convertFloat()* handles both decimal and hexadecimal, confusion is possible. Consider the decimal float **3E7** and the hexadecimal integer **0x3E7**. Then this bit of code:

```Python
# convert integer portion (if any)

uint = 0
if lint != '':
    p = re.search('[1-9A-F][0-9A-F]*', lint )
    if p != None:
        for ch in p.group():
            digval = '0123456789ABCDEF'.find(ch)
            if uint <= (self._FLTMAX - digval)/base:
                uint =  uint * base + digval
            else:
                return rangeErr()
```

will continue to work fine for hexadecimals but misinterpret the decimal as 300 + 140 + 7 = 447 instead of the intended 30,000,000.

One solution is to have two conversion functions, one for decimals and another for hexadecimals. That would guarantee no confusion but seems like a backward step.

We could also introduce a number of alternate pathways within the existing function designed to avoid possible misinterpretation. This would require one or more flag values used to decide which path to take. The most obvious flag would be the base we already pass to the conversion function. This might well work for the two bases we currently support, but what if we want to add more in the future? Would we have to keep re-writing the function to recognize them as well?

No, we're going to keep using a single function, and we're going to recognize both decimal and hexadecimal floats with exponents. But we're going to have to alter the conversion function a bit, hopefully in a way that will be relatively simple and robust.

We'll get around the problem of what *E* represents in a hexadecimal floats by using a [notation introduced in the 1999 version](https://www.exploringbinary.com/hexadecimal-floating-point-constants/) of the [C programming language standard](https://en.wikipedia.org/wiki/C99) to differentiate decimal and hexadecimal floats with exponents. This form uses the letter *P* to prefix any exponent portion of a hexadecimal float. The exponent itself is still expressed in decimal, but stands for a power of two rather than ten (details which we must account for in our conversion function).

For decimal floating point, the regular expression we use to recognize them is almost the same as in the last version, just with an optional exponent tacked on:

```Python
'([0-9]+([.][0-9]*)?|[.][0-9]+)([eE][-+]?[0-9]+)?'
```

This means "(one or more digits, optionally followed by a decimal point, which can optionally be followed by zero or more digits, *OR* a decimal point followed by one or more digits), optionally followed by (*E* followed by an optional sign value followed by one or more decimal digits)".

The regular expression to recognize hexadecimal floats is similar, except using *P* for the exponent prefix.

Now for the fun part. We're going to add a third argument to *convertFloat()*, a regular expression specifically designed to "pick apart" or deconstruct whatever we've recognized as a float literal into a format that works for both decimal and hexadecimal. This is meant to minimize (though we cannot eliminate) the use of any flag values. It should also enable us to add any further bases we want to allow by passing the appropriate regular expression to the conversion function without having to change anything internal to it.

We'll describe the decimal version of the deconstruction regex (the hexadecimal version is similar, as is one for any other base):

```Python
'([0-9]*)[.]?([0-9]*)(([E])([-+])?([0-9]+))?'
```

We can get away with a somewhat simpler regex for deconstruction than recognition because we already know that we have some form of float.

The basic idea is to exploit [capture groups](https://docs.python.org/3/howto/regex.html#grouping) to isolate everything we need to know in order to perform the conversion. We have been using regex capture groups all along in their simplest form. The *group()* method applied to a match object just returns the entire string matched. But here we need something more specific:

```
Group 0 |----------------------------------------|
Group 1 |------|
Group 2             |------|
Group 3                     |--------------------|
Group 4                      |---|
Group 5                           |----|
Group 6                                  |------|

        ([0-9]*)[.]?([0-9]*)(([E])([-+])?([0-9]+))?
```

We use this capture group regex when creating a match object, then use the unpacking syntax to "pick apart" the result:


```Python
# collect the features of interest

p = re.search(capgrp, fplit.upper())

lint, lfrc, expbas, expsgn, lexp = p.group(1,2,4,5,6)
```

Groups 1, 2 and 6 are the literal numerical values of the integer, fractional and exponent parts of the float. Group 4 tells us which power is being raised to the value of the exponent and group 5 whether that value is negative. If any group is not present in the input literal it will have a value of **None** (or indistinguishable from it).

Group 3 is just the entire optional exponent, which is not helpful to us in that form. So although we need to specify it, we don't bother with it after that. We still do not care about any radix point, so we still ignore it.

>Another approach to converting a floating point literal to internal form is to do the job [manually](http://krashan.ppa.pl/articles/stringtofloat/). If we did not have access to regular expression capture groups, that's probably what we'd do. Almost a throwback to programming on the [bare metal](https://en.wikipedia.org/wiki/Bare_machine).

Once we have the integer and franctional components, we convert them just as we did in the previous version. Things only become interesting when we reach any exponent:

```Python
# convert exponent portion (if any)

uexp = 0
if lexp:
    for ch in lexp:
        digval = '0123456789'.find(ch)
        if uexp <= (self._expMax[expbas] - digval)/10:
            uexp =  uexp * 10 + digval
        else:
            return rangeErr()
```

As we have been doing all along, we want to guard against numeric overflow. The problem here is that the maximum exponent we can use for a power of two is not the same as for a power of ten. We solve that by calculating what these values are and saving them in a dictionary as a class variable. The dictionary is indexed by the indicator characters *E* and *P*.

>We could avoid requiring the *math* library and its very handy and precise *log2()* and *log10()* functions by hard coding the inevitable results the same way we hard coded the range limits in the first place. After all, once set those values are not going to change for either base. But we'd have to remember to change them if we ever changed the limits. Not an impossible task but one which the computer can do for us without our having to remember.

If there is a non-zero exponent value (and there's nothing to stop a float literal from sporting an exponent with a zero value), then we use it to adjust our float value:

```Python
# adjust value by exponent (if any)

if uexp:
    power = (2 if expbas == 'P' else 10) ** uexp
    if expsgn == '-':
        uflt /= power
    elif uflt <= self._FLTMAX/power:
        uflt *= power
    else:
        return rangeErr()
```

We calculate the adjustment value the most direct way possible: raising the base to the exponent power using Python's exponentiation operator. If the exponent is negative, we divide our float by that value (it's possible this might underflow, but in that case the result will be effectively zero. Close enough). We do have to check for overflow, but only once.

If overflow does happen, we report it as always.

>By this time there are enough such reports that it's worth making a nested *rangeErr()* function within *convertFloat()*. It would work just as well to make it a new function outside of *convertFloat()*. We might do that If any other *Parse* function or method ever needs to report a range error.

# Evaluator

In [None]:
# operators handled:
# - unary negation, plus
# - binary addition, subtraction, multiplication, division
# - logical negation, equality and inequality

# errors detected:
# - out of range
# - division by zero

# return tuple:
# - (True, result)
# - (False, None)

class Evaluator(Parser):
    
    def __init__(self):
        pass
 
    def doeval(self, rpn):
 
        def inRange(ok, val):
            '''range check test result'''
            if ok:
                stk.append( val )
            else:
                UIerror( 'Evaluation result out of range' )
            return ok
        
        def pushOperand(val):
            '''push operand on stack'''
            stk.append( val )
            return True

#         def unPlu(arg):
#           '''unary plus'''
#           return pushOperand( arg )

        def unNeg(arg):
            '''unary negation'''
            return inRange( arg != self._FLTMIN, -arg )
        
        def unNot(arg):
            '''logical not'''
            return pushOperand( not arg )

        def binAdd(rgt, lft):
            '''binary addition'''
            if lft >= 0:
                return inRange( rgt <= self._FLTMAX - lft, lft+rgt )       
            else:
                return inRange( rgt >= self._FLTMIN - lft, lft+rgt )

        def binSub(rgt, lft):
            '''binary subtraction'''
            if lft >= 0:
                return inRange( lft - self._FLTMAX <= rgt, lft-rgt )
            elif rgt >= 0:
                return inRange( lft - self._FLTMIN >= rgt, lft-rgt )
            
            # negative minus negative
            # required: lft - rgt <= FLTMAX
            
            else:
                return inRange( lft <= self._FLTMAX + rgt, lft-rgt )

        def binMul(rgt, lft):
            '''binary multiplication'''
            if abs(lft) <= 1 or abs(rgt) <= 1:
                return pushOperand( lft * rgt )

            if lft > 0:
                if rgt > 0:
                    return inRange( rgt <= self._FLTMAX / lft, lft*rgt )
                else:
                    return inRange( rgt >= self._FLTMIN / lft, lft*rgt )

            else:
                if rgt > 0:
                    return inRange( rgt <= self._FLTMIN / lft, lft * rgt )
                else:
                    return inRange( rgt >= self._FLTMAX / lft, lft * rgt )

        def binDiv(rgt, lft):
            '''binary division'''
            if abs(rgt) >= 1:
                return inRange( True, lft/rgt )
            
            # rgt positive ?
            
            elif rgt > 0:
                
                # lft positive ?
                # req: lft/rgt <= FLTMAX
                
                if lft > 0 :
                    return inRange( lft <= self._FLTMAX * rgt, lft / rgt )
                
                # req: lft/rgt >= FLTMIN
                
                else:
                    return inRange( lft >= self._FLTMIN * rgt, lft / rgt )
                
            # rgt negative ?
            
            elif rgt < 0:
                
                # lft positive ?
                # req: lft/rgt >= FLTMIN
                
                if lft > 0:
                    return inRange( lft <= self._FLTMIN * rgt, lft / rgt )       # reverse inequality
                 
                # req: lft/rgt <= FLTMAX
                
                else:
                    return inRange( lft >= self._FLTMAX * rgt, lft / rgt )        # reverse inequality
                
            # rgt is zero

            else:
                UIerror( 'Division by zero' )
                return False
            
        def binEqu(rgt, lft):
            '''logical equality'''
            return pushOperand( lft == rgt )
        
        def binNeq(rgt, lft):
            '''logical inequality'''
            return pushOperand( lft != rgt )
        
        # initialize
                                
        unDispatch = {
            'U-': unNeg,
            'U+': pushOperand,
            'U!': unNot
        }
        
        binDispatch = {
            'B+': binAdd,
            'B-': binSub,
            'B*': binMul,
            'B/': binDiv,
            'B==': binEqu,
            'B!=': binNeq
        }
  
        stk = []
        ok = True

        # main loop
        
        for v in rpn:

            ETshowtoken(v)
            
            if v in binDispatch:
                ok = binDispatch[v](stk.pop(), stk.pop())
                
            elif v in unDispatch:
                ok = unDispatch[v](stk.pop())
                
            else:
                stk.append( v )

            if not ok:
                return ( False, None )

            ETshoweval( stk )

        return ( True, stk.pop() )


### How it works

There are no changes to the evaluator in this version.

## Running the parser

In [None]:
passCnt = failCnt = 0                       # most useful for test input files, but never any harm

myParser = myEvaluator = None               # where we keep instances of our classes

def startUp(flag):
    '''begin execution'''
    global passCnt, failCnt, showTrace
    global myParser, myEvaluator
    if not myParser:
        myParser = Parser()
    if not myEvaluator:
        myEvaluator = Evaluator()
    UIshow( 'Parser', myParser.VERSIONNUMBER )
    passCnt = failCnt = 0
    showTrace = flag
    
def shutDown():
    '''terminate execution'''
    UIwriteSep()
    UIshow( 'Pass', passCnt )
    UIshow( 'Fail', failCnt )
    
# run parser

def parseOne(this):
    '''parse/evaluate one expression'''
    global passCnt, failCnt
    UIwriteSep()
    UIshow( 'Input', this )
    ok, res = myParser.doparse( this )
    if ok:
        UIshow( 'Final Parse', res )
        ok, res = myEvaluator.doeval( res )
        if ok:
            UIshow( 'Final Eval', res )
    if ok:
        passCnt += 1
    else:
        failCnt += 1

## Interactive use

In [None]:
def parse():
    
    startUp(showInteract)
    while True:
        inp = input( 'Expression: ' )
        UIwriteln( '' )                      # looks better with a blank line here
        if inp.upper()[0] == 'Q':
            break
        elif inp.strip():
            parseOne( inp )
    shutDown()

## Batch processing

In [None]:
testDir = '..\\ParserTest\\'            # directory holding test input files (empty string if same as notebook directory)

# convert current version number to match test file numbers
# - done this way so we can update only the version number and everything still works

def currNum():
    
    head = myParser.VERSIONNUMBER[:len(myParser.VERSIONNUMBER)-3]
    tail = myParser.VERSIONNUMBER[-2:]
    return f'{head:0>2}{tail}'

# make full path name to test file

def makePath(typ, num):
    return f'{testDir}{typ}{num}.txt'

# run one test

def runTest(this):
    
    UIwriteln(f'Parser {myParser.VERSIONNUMBER} vs {this[-12:-4]}')
    
    with open(this) as f:
        data = f.readlines()
    for line in data:
        test = line.strip()
        if test and test[0] != '#':         # skip blank and comment lines
            parseOne(test)
    
# run a test of current or specified version which should succeed
    
def good(num='curr'):
  
    startUp(showBatch)
    runTest(makePath('pass', currNum() if num == 'curr' else num))
    shutDown()
    
# run a test of current or specified version which should fail

def bad(num='curr'):
    
    startUp(showBatch)
    runTest(makePath('fail', currNum() if num == 'curr' else num))
    shutDown()
    
# run regression test against current and all previous test files

def regress():
            
    UIwriteln('PASS tests')
    
    startUp(showBatch)                       # must create objects before we can access variables inside them 
    currFn = makePath('pass', currNum())
    failed = []
    fnlist = glob.glob(f'{testDir}pass????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = failCnt
            runTest(fn)
            if atstart < failCnt:
                failed.append(fn)               
    shutDown()
    
    UIwriteln('FAIL tests')
    
    startUp(showBatch)
    currFn = makePath('fail',currNum())
    passed = []
    fnlist = glob.glob(f'{testDir}fail????.txt')
    for fn in fnlist:
        if fn <= currFn:
            atstart = passCnt
            runTest(fn)
            if atstart < passCnt:
                passed.append(fn)                
    shutDown()
    
    if not len(failed):
        UIwriteln('All pass tests succeded')
    else:
        UIwriteln('Pass tests which failed')
        for fn in failed:
            UIwriteln(f'  {fn}')
            
    if not len(passed):
        UIwriteln('All fail tests succeded')
    else:
        UIwriteln('Fail tests which passed')
        for fn in passed:
            UIwriteln(f'   {fn}')
              

# Testing the parser

In [None]:
parse()       # interactive, one expression at a time

In [None]:
good()        # run current parser against its own pass test. Use good('1234') to run against specific pass test.

In [None]:
bad()         # run current parser against its own fail test. Use bad('5678') to run against specific fail test.

In [None]:
regress()     # run parser against all previous tests