In [None]:
from IPython.core.display import HTML
with open ("../style.css", "r") as file:
    css = file.read()
HTML(css)

# A Shift-Reduce Parser for Arithmetic Expressions

In this notebook we implement a simple *recursive descend* parser for arithmetic expressions.
This parser will implement the following grammar:
$$
  \begin{eqnarray*}
  \mathrm{expr}        & \rightarrow & \mathrm{expr}\;\;\texttt{'+'}\;\;\mathrm{product}   \\
                       & \mid        & \mathrm{expr}\;\;\texttt{'-'}\;\;\mathrm{product}   \\
                       & \mid        & \mathrm{product}                                    \\[0.2cm]
  \mathrm{product}     & \rightarrow & \mathrm{product}\;\;\texttt{'*'}\;\;\mathrm{factor} \\
                       & \mid        & \mathrm{product}\;\;\texttt{'/'}\;\;\mathrm{factor} \\
                       & \mid        & \mathrm{factor}                                     \\[0.2cm]
  \mathrm{factor}      & \rightarrow & \texttt{'('} \;\;\mathrm{expr} \;\;\texttt{')'}     \\
                       & \mid        & \texttt{NUMBER} 
  \end{eqnarray*}
$$

In [None]:
import re

The function `tokenize` transforms the string `s` into a list of tokens. See below for an example.

In [None]:
def tokenize(s):
    '''Transform the string s into a list of tokens.  The string s
       is supposed to represent an arithmetic expression.
    '''
    lexSpec = r'''([ \t]+)        |  # blanks and tabs
                  ([1-9][0-9]*|0) |  # number
                  ([()])          |  # parentheses 
                  ([-+*/])        |  # arithmetical operators
                  (.)                # unrecognized character
               '''
    tokenList = re.findall(lexSpec, s, re.VERBOSE)
    result    = []
    for ws, number, parenthesis, operator, error in tokenList:
        if ws:        # skip blanks and tabs
            continue
        elif number:
            result += [ 'int' ]
        elif parenthesis:
            result += [ parenthesis ]
        elif operator:
            result += [ operator ]
        else:
            result += [ f'ERROR({error})']
    return result

In [None]:
tokenize('1 + 2 * (3 - 4)')

In [None]:
class ShiftReduceParser():
    def __init__(self, actionTable, gotoTable, stateTable):
        self.mActionTable = actionTable
        self.mGotoTable   = gotoTable
        self.mStateTable  = stateTable

In [None]:
def parse(self, TL):
    index   = 0      # points to next token
    Symbols = []     # stack of symbols
    States  = ['s0'] # stack of states, s0 is start state
    while True:
        q = States[-1]
        t = TL[index]
        print(f'States:  [ {", ".join(States)} ]')
        print(f'Symbols: {" ".join(Symbols)} | {" ".join(TL[index:])}')
        print(f'State:   {", ".join(self.mStateTable[q])}')
        p = self.mActionTable.get((q, t))
        if p == None: 
            print(f'Action({q}, {t}) undefined.')
            print('Syntax error!\n')
            return False
        elif p[0] == 'shift':
            s = p[1]
            print(f'Shifting state {s}')
            print(f'State:   {", ".join(self.mStateTable[s])}\n')
            Symbols += [t]
            States  += [s]
            index   += 1
        elif p == 'accept':
            print('Accepting!\n')
            return True
        else:
            head, body = p
            print(f'Reducing with rule {head} → {" ".join(body)}')
            n       = len(body)
            Symbols = Symbols[:-n]
            States  = States [:-n] 
            Symbols = Symbols + [head]
            state   = States[-1]
            States += [ self.mGotoTable[state, head] ]
            print(f'State:    {", ".join(self.mStateTable[self.mGotoTable[state, head]])}\n')
            
ShiftReduceParser.parse = parse
del parse

In [None]:
%run Parse-Table.ipynb

## Testing

In [None]:
def test(s): 
    parser = ShiftReduceParser(actionTable, gotoTable, stateTable)
    TL     = tokenize(s)
    print(f'tokenlist: {TL}\n')
    if parser.parse(TL + ['EOF']):
        print('Parse successful!')
    else:
        print('Parse failed!')

In [None]:
test('1 + 2 * 3')

In [None]:
test('0*11+22*(33-44)/(5-10*5/(4-3))')

In [None]:
test('11+22*(33-44)/(5-10*5/(4-3))')