# Implementing an SLR-Table-Generator

## A Grammar for Grammars

We use the following grammar to describe the syntax of a context free grammar.
```
grammar
    : rule
    | rule grammar
    ;

rule
    : VARIABLE ':' body_list ';'
    ;

body_list
    : body 
    | body '|' body_list
    ;

body
    : 
    | item body
    ;
 
item : VARIABLE 
     | TOKEN  
     | LITERAL
     ;
```

As the goal is to generate an *SLR-table-generator* we first need to implement a parser for context free grammars.
The file `arith.g` in the directory `Examples` contains an example grammar that describes arithmetic expressions.

In [1]:
!cat Examples/arith.g

expr: expr '+' product
    | expr '-' product
    | product
    ;
 
product: product '*' factor
       | product '/' factor
       | factor
       ;
       
factor: '(' expr ')'
      | NUMBER
      ;


We use <span style="font-variant:small-caps;">Ply</span> to develop a parser for context free grammars.  

In [2]:
import ply.lex as lex

In [3]:
tokens = [ 'VARIABLE',  # r'[a-z][a-z0-9_]*'
           'TOKEN',     # r'[A-Z][A-Z0-9_]*'
           'LITERAL',   # r"'.'"
         ]

In [4]:
t_VARIABLE = r'[a-z][a-z0-9_]*'
t_TOKEN    = r'[A-Z][A-Z0-9_]*'

In [5]:
def t_comment(t):
    r'//.*'

In [6]:
def t_LITERAL(t):
    r"'.*?'"
    t.value = t.value[1:-1]
    return t

In [7]:
literals = [':', '|', ';']

In [8]:
t_ignore = ' \t\r'

In [9]:
def t_newline(t):
    r'\n'
    t.lexer.lineno += 1
    return

In [10]:
def find_column(token):
    program    = token.lexer.lexdata  # the complete string given to the scanner
    line_start = program.rfind('\n', 0, token.lexpos)
    return token.lexpos - line_start

In [11]:
def t_error(t):
    column = find_column(t)
    print(f"Illegal character '{t.value[0]}' in line {t.lineno}, column {column}.")
    t.lexer.skip(1)

In [12]:
__file__ = 'main'

In [13]:
lexer = lex.lex()

In [14]:
def test_scanner(file_name):
    with open(file_name, 'r') as handle:
        program = handle.read() 
    print(program)
    lexer.input(program)
    lexer.lineno = 1          # reset line number
    for t in lexer:           # start scanning and collect all tokens
        print(t) 

In [15]:
test_scanner('Examples/arith.g')

expr: expr '+' product
    | expr '-' product
    | product
    ;
 
product: product '*' factor
       | product '/' factor
       | factor
       ;
       
factor: '(' expr ')'
      | NUMBER
      ;

LexToken(VARIABLE,'expr',1,0)
LexToken(:,':',1,4)
LexToken(VARIABLE,'expr',1,6)
LexToken(LITERAL,'+',1,11)
LexToken(VARIABLE,'product',1,15)
LexToken(|,'|',2,27)
LexToken(VARIABLE,'expr',2,29)
LexToken(LITERAL,'-',2,34)
LexToken(VARIABLE,'product',2,38)
LexToken(|,'|',3,50)
LexToken(VARIABLE,'product',3,52)
LexToken(;,';',4,64)
LexToken(VARIABLE,'product',6,68)
LexToken(:,':',6,75)
LexToken(VARIABLE,'product',6,77)
LexToken(LITERAL,'*',6,85)
LexToken(VARIABLE,'factor',6,89)
LexToken(|,'|',7,103)
LexToken(VARIABLE,'product',7,105)
LexToken(LITERAL,'/',7,113)
LexToken(VARIABLE,'factor',7,117)
LexToken(|,'|',8,131)
LexToken(VARIABLE,'factor',8,133)
LexToken(;,';',9,147)
LexToken(VARIABLE,'factor',11,157)
LexToken(:,':',11,163)
LexToken(LITERAL,'(',11,165)
LexToken(VARIABLE,'expr',11,169)
Le

In [16]:
import ply.yacc as yacc

In [17]:
start = 'grammar'

In [18]:
def p_grammar_one(p):
    "grammar : rule"
    p[0] = p[1]

def p_grammar_more(p):
    "grammar : rule grammar"
    p[0] = p[1] + p[2]

In [19]:
def p_rule(p):
    "rule : VARIABLE ':' body_list ';'"
    p[0] = [ (p[1],) + body for body in p[3] ]

In [20]:
def p_body_list_one(p):
    "body_list : body"
    p[0] = [p[1]]

def p_body_list_more(p):
    "body_list : body '|' body_list "
    p[0] = [p[1]] + p[3]

In [21]:
def p_body_empty(p):
    "body : "
    p[0] = ()

def p_body_more(p):
    "body : item body"
    p[0] = (p[1],) + p[2]

In [22]:
def p_item_variable(p):
    "item : VARIABLE"
    p[0] = p[1]

def p_item_terminal(p):
    "item : TOKEN"
    p[0] = p[1]

def p_item_literal(p):
    "item : LITERAL"
    p[0] = p[1]

In [23]:
def p_error(t):
    column = find_column(t)
    if t:
        print(f'Syntax error at token "{t.value}" in line {t.lineno}, column {column}.')
    else:
        print('Syntax error at end of input.')

In [24]:
yacc.yacc(write_tables=False, debug=True)

Generating LALR tables


<ply.yacc.LRParser at 0x1064b8ad0>

In [25]:
def parse(file):
    lexer.lineno = 1
    with open(file, 'r') as handle:
        grammar = handle.read() 
    print(grammar)
    ruleList = yacc.parse(grammar)
    return ruleList

In [26]:
ruleList = parse('Examples/arith.g')
ruleList

expr: expr '+' product
    | expr '-' product
    | product
    ;
 
product: product '*' factor
       | product '/' factor
       | factor
       ;
       
factor: '(' expr ')'
      | NUMBER
      ;



[('expr', 'expr', '+', 'product'),
 ('expr', 'expr', '-', 'product'),
 ('expr', 'product'),
 ('product', 'product', '*', 'factor'),
 ('product', 'product', '/', 'factor'),
 ('product', 'factor'),
 ('factor', '(', 'expr', ')'),
 ('factor', 'NUMBER')]

The parser will return a list of grammar rules, where each rule of the form
$$ a \rightarrow \beta $$
is stored as the tuple `(a,) + 𝛽`.

In [None]:
%load_ext nb_mypy

We define a few *type aliases* in order to make the types more readable.

In [None]:
Variable = str
Token    = str
Symbol   = Variable | Token
Symbols  = tuple[Symbol, ...]

### The Class `GrammarRule`

The class `GrammarRule` is used to store a single grammar rule.  As we have to use objects of type `GrammarRule` as *keys* in a dictionary later, we have to provide the methods `__eq__`, `__ne__`, and `__hash__`.

In [None]:
class GrammarRule:
    def __init__(self, variable: Variable, body: Symbols) -> None:
        self.mVariable: Variable = variable
        self.mBody    : Symbols  = body
        
    def __eq__(self, other) -> bool:
        return isinstance(other, GrammarRule)    and \
               self.mVariable == other.mVariable and \
               self.mBody     == other.mBody
    
    def __ne__(self, other) -> bool:
        return not self.__eq__(other)
    
    def __hash__(self) -> int:
        return hash(self.__repr__())
    
    def __repr__(self) -> str:
        return f'{self.mVariable} → {" ".join(self.mBody)}'

The function `transform(rules)` takes a list of tuples representing grammar rules and converts these tuples into objects of class 
`GrammarRule`.

In [None]:
def transform(rules: list[tuple[Symbol, ...]]) -> list[GrammarRule]:
    return [ GrammarRule(var, tuple(body)) for (var, *body) in rules]

In [None]:
ruleList: list[tuple[Symbol, ...]]

In [None]:
grammar = transform(ruleList) 

In [None]:
grammar

Given a string `name`, which is either a *variable*, a *token*, or a *literal*, the function `is_var` checks whether `name` is a variable.  The function can distinguish variable names from tokens and literals because variable names consist only of lower case letters, while tokens are all uppercase and literals start with the character "`'`".

In [None]:
def is_var(name: Symbol) -> bool:
    return name[0].islower()

Given a list `Rules` of `GrammarRules`, the function `collect_variables(Rules)` returns the set of all *variables* occuring in `Rules`.

In [None]:
def collect_variables(Rules: list[GrammarRule]) -> set[Variable]:
    Variables: set[Variable] = set()
    for rule in Rules:
        print(rule)
        Variables.add(rule.mVariable)
        for item in rule.mBody:
            if is_var(item):
                Variables.add(item)
    return Variables

In [None]:
collect_variables(grammar) 

Given a set `Rules` of `GrammarRules`, the function `collect_tokens(Rules)` returns the set of all *tokens* and *literals* occuring in `Rules`.

In [None]:
def collect_tokens(Rules: list[GrammarRule]) -> set[Token]:
    Tokens: set[Token] = set()
    for rule in Rules:
        for item in rule.mBody:
            if not is_var(item):
                Tokens.add(item)
    return Tokens

In [None]:
collect_tokens(grammar)

## Marked Rules

The class `MarkedRule` stores a single *marked rule* of the form
$$ v \rightarrow \alpha \bullet \beta $$
where the *variable* $v$ is stored in the member variable `mVariable`, while $\alpha$ and $\beta$ are stored in the variables `mAlpha`and `mBeta` respectively.  These variables are assumed to contain tuples of *grammar symbols*.  A *grammar symbol* is either
- a *variable*,
- a *token*, or
- a *literal*, i.e. a string enclosed in single quotes.


Later, we need to maintain sets of *marked rules* to represent *states*.  Therefore, we have to define the methods `__eq__`, `__ne__`, and `__hash__`.

In [None]:
class MarkedRule():
    def __init__(self, variable: Variable, alpha: Symbols, beta: Symbols) -> None:
        self.mVariable: Variable = variable
        self.mAlpha   : Symbols  = alpha
        self.mBeta    : Symbols  = beta
        
    def __eq__(self, other) -> bool:
        return isinstance(other, MarkedRule)     and \
               self.mVariable == other.mVariable and \
               self.mAlpha    == other.mAlpha    and \
               self.mBeta     == other.mBeta
    
    def __ne__(self, other) -> bool:
        return not self.__eq__(other)
    
    def __hash__(self) -> int:
        return hash(self.__repr__())
    
    def __repr__(self) -> str:
        alphaStr = ' '.join(self.mAlpha)
        betaStr  = ' '.join(self.mBeta)
        return f'{self.mVariable} → {alphaStr} • {betaStr}'

Given a *marked rule* `self`, the function `is_complete` checks, whether the *marked rule* `self` has the form
$$ c \rightarrow \alpha\; \bullet,$$
i.e. it checks, whether the $\bullet$ is at the end of the grammar rule.

In [None]:
def is_complete(self: MarkedRule) -> bool:
    return len(self.mBeta) == 0

MarkedRule.is_complete = is_complete # type: ignore
del is_complete

Given a *marked rule* `self` of the form
$$ c \rightarrow \alpha \bullet X\, \delta, $$
the function `symbol_after_dot` returns the *symbol* $X$. If there is no symbol after the $\bullet$, the method returns `None`.

In [None]:
def symbol_after_dot(self: MarkedRule) -> Symbol | None:
    if len(self.mBeta) > 0:
        return self.mBeta[0]
    return None

MarkedRule.symbol_after_dot = symbol_after_dot # type: ignore
del symbol_after_dot

Given a marked rule of the form
$$ c \rightarrow \alpha \bullet b \delta, $$
this function returns the variable $b$ following the dot.  If there is no variable following the dot, the function returns `None`.  

In [None]:
def next_var(self: MarkedRule) -> Variable | None:
    if len(self.mBeta) > 0:
        var = self.mBeta[0]
        if is_var(var):
            return var
    return None

MarkedRule.next_var = next_var # type: ignore
del next_var

The function `move_dot(self)` transforms a *marked rule*  of the form 
$$ c \rightarrow \alpha \bullet X\, \beta $$
into a *marked rule* of the form
$$ c \rightarrow \alpha\, X \bullet \beta, $$
i.e. the $\bullet$ is moved over the next symbol.  Invocation of this method assumes that there is a symbol
following the $\bullet$.

In [None]:
def move_dot(self: MarkedRule) -> MarkedRule:
    return MarkedRule(self.mVariable, 
                      self.mAlpha + (self.mBeta[0],), 
                      self.mBeta[1:])

MarkedRule.move_dot = move_dot # type: ignore
del move_dot

The function `to_rule(self)` turns the *marked rule* `self` into  a `GrammarRule`, i.e. the *marked rule*
$$ c \rightarrow \alpha \bullet \beta $$
is turned into the grammar rule
$$ c \rightarrow \alpha\, \beta. $$

In [None]:
def to_rule(self: MarkedRule) -> GrammarRule:
    return GrammarRule(self.mVariable, self.mAlpha + self.mBeta)

MarkedRule.to_rule = to_rule # type: ignore
del to_rule

## SLR-Table-Generation

The class `Grammar` represents a context free grammar.  It stores a list of the `GrammarRules` of the given grammar.
Each grammar rule is of the form
$$ a \rightarrow \beta $$
where $\beta$ is a tuple of variables, tokens, and literals.
The start symbol is assumed to be the variable on the left hand side of the first rule. The grammar is *augmented* with the rule
$$ \widehat{s} \rightarrow s\, \$. $$
Here $s$ is the start variable of the given grammar and $\widehat{s}$ is a new variable that is the start variable of the *augmented grammar*. The symbol `$` denotes the end of input.  The non-obvious member variables of the class `Grammar` have the following interpretation
- `mStates` is the set of all states of the *SLR-parser*.  These states are sets of *marked rules*.
- `mStateNames`is a dictionary assigning names of the form `s0`, `s1`, $\cdots$, `sn` to the states stored in 
  `mStates`.  The functions `action` and `goto` will be defined for *state names*, not for *states*, because 
  otherwise the table representing these functions would become both huge and unreadable.
  Therefore, the dictionary `mStateNames` is needed to associate the the states with their names.
- `mConflicts` is a Boolean variable that will be set to `True` if the table generation discovers 
  *shift/reduce conflicts* or *reduce/reduce conflicts*.

In [None]:
class Grammar():
    def __init__(self, Rules: list[GrammarRule]):
        self.mRules     : list[GrammarRule] = Rules
        self.mStart     : Variable          = Rules[0].mVariable
        self.mVariables : set[Variable]     = collect_variables(Rules)
        self.mTokens    : set[Token]        = collect_tokens(Rules)
        self.mStates    : set[frozenset[MarkedRule]] = set()
        self.mConflicts : bool              = False
        self.mStateNames: dict[str, set[MarkedRule]] = {}
        self.mVariables.add('ŝ')
        self.mTokens.add('$') # short for EOF
        self.mRules.append(GrammarRule('ŝ', (self.mStart, '$'))) # augment the grammar
        self.mRuleNames: dict[GrammarRule, str] = {} 
        self.compute_tables()                                    

    def compute_tables(self) -> None:
        pass

Given a set of `Variables`, the function `initialize_dictionary` returns a dictionary that assigns the empty set to all variables.
This function is needed to initialize the member variable `mFirst` and `mFollow` that are dictionaries storing the *first-set* and
*follow-sets* of the syntactical variables.

In [None]:
def initialize_dictionary(Variables: set[Variable]) -> dict[Variable, set[Token]]:
    return { a: set() for a in Variables }

Given a `Grammar`, the function `compute_tables` computes
- the sets `First(v)` and `Follow(v)` for every variable `v`,
- the set of all *states* of the *SLR-Parser*,
- the *action table*, and
- the *goto table*. 

Given a grammar `g`,
- the set `g.mFirst` is a dictionary such that `g.mFirst[a] = First(a)` and
- the set `g.mFollow` is a dictionary such that `g.mFollow[a] = Follow(a)` for all variables `a`.

In [None]:
def compute_tables(self: Grammar) -> None:
    self.mFirst  = initialize_dictionary(self.mVariables) # type: ignore
    self.mFollow = initialize_dictionary(self.mVariables) # type: ignore
    self.compute_first()         # type: ignore
    self.compute_follow()        # type: ignore
    self.compute_rule_names()    # type: ignore
    self.all_states()            # type: ignore
    self.compute_action_table()  # type: ignore
    self.compute_goto_table()    # type: ignore
    
Grammar.compute_tables = compute_tables # type: ignore
del compute_tables

The function `compute_rule_names` assigns a unique name to each *rule* of the grammar.  These names are used later
to represent *reduce actions* in the *action table*.

In [None]:
def compute_rule_names(self: Grammar) -> None:
    counter = 0
    for rule in self.mRules:
        self.mRuleNames[rule] = 'r' + str(counter)
        counter += 1
        
Grammar.compute_rule_names = compute_rule_names # type: ignore
del compute_rule_names

The function `compute_first(self)` computes the sets $\texttt{First}(c)$ for all variables $c$ and stores them in the dictionary `mFirst`.  Abstractly, given a variable $c$ the function $\texttt{First}(c)$ is the set of all tokens that can start a string that is derived from $c$:
$$\texttt{First}(\texttt{c}) := 
  \Bigl\{ t \in T \Bigm| \exists \gamma \in (V \cup T)^*: \texttt{c} \Rightarrow^* t\,\gamma \Bigr\}.
$$
The definition of the function $\texttt{First}()$ is extended to strings from $(V \cup T)^*$ as follows:
- $\texttt{FirstList}(\lambda) = \{\}$.
- $\texttt{FirstList}(t \beta) = \{ t \}$  if $t \in T$.
- $\texttt{FirstList}(\texttt{a} \beta) = \left\{
       \begin{array}[c]{ll}
         \texttt{First}(\texttt{a}) \cup \texttt{FirstList}(\beta) & \mbox{if $\texttt{a} \Rightarrow^* \lambda$;} \\
         \texttt{First}(\texttt{a})                                & \mbox{otherwise.}
       \end{array}
       \right.
      $ 

If $\texttt{a}$ is a variable of $G$ and the rules defining $\texttt{a}$ are given as 
$$\texttt{a} \rightarrow \alpha_1 \mid \cdots \mid \alpha_n, $$
then we have
$$\texttt{First}(\texttt{a}) = \bigcup\limits_{i=1}^n \texttt{FirstList}(\alpha_i). $$
The dictionary `mFirst` that stores this function is computed via a *fixed point iteration*.

In [None]:
def compute_first(self: Grammar) -> None:
    change = True
    while change:
        change = False
        for rule in self.mRules:
            a, body = rule.mVariable, rule.mBody
            first_body = self.first_list(body)      # type: ignore
            if not (first_body <= self.mFirst[a]):  # type: ignore
                change = True
                self.mFirst[a] |= first_body        # type: ignore   
    print('First sets:')
    for v in self.mVariables:
        print(f'First({v}) = {self.mFirst[v]}')     # type: ignore
        
Grammar.compute_first = compute_first               # type: ignore
del compute_first

Given a tuple of variables and tokens `alpha`, the function `first_list(alpha)` computes the function $\texttt{FirstList}(\alpha)$ that has been defined above.  If `alpha` is *nullable*, then the result will contain the empty string $\lambda$.

In [None]:
def first_list(self: Grammar, alpha: Symbols) -> set[Token]:
    if len(alpha) == 0:
        return { '' }
    elif is_var(alpha[0]): 
        v, *r = alpha
        return eps_union(self.mFirst[v], self.first_list(r)) # type: ignore
    else:
        t = alpha[0]
        return { t }
    
Grammar.first_list = first_list                              # type: ignore
del first_list

The arguments `S` and `T` of `eps_union` are sets that contain tokens and, additionally, they might contain the empty string 
$\lambda =$ `''`.  The specification of `eps_union` is:
$$ \texttt{eps\_union}(S, T) = \left\{ \begin{array}{ll}
                                       S          & \mbox{if $\lambda \not\in S$} \\
                                       S \cup T   & \mbox{if $\lambda \in S \wedge \lambda \in T$} \\
                                       S \cup T - \{\lambda \} & \mbox{if $\lambda \in S \wedge \lambda \not\in T$}
                                      \end{array}
                              \right.
$$

In [None]:
def eps_union(S: set[Token], T: set[Token]) -> set[Token]:
    if '' in S: 
        if '' in T: 
            return S | T
        return (S - { '' }) | T
    return S

Given an augmented grammar $G = \langle V,T,R\cup\{\widehat{s} \rightarrow s\,\$\}, \widehat{s}\rangle$ 
and a variable $a$, the set of tokens that might follow $a$ is defined as:
$$\texttt{Follow}(a) := 
 \bigl\{ t \in \widehat{T} \,\bigm|\, \exists \beta,\gamma \in (V \cup \widehat{T})^*: 
                           \widehat{s} \Rightarrow^* \beta \,a\, t\, \gamma 
  \bigr\}.
$$
The function `compute_follow` computes the sets $\texttt{Follow}(a)$ for all variables $a$ via a *fixed-point iteration*.

In [None]:
def compute_follow(self: Grammar) -> None:
    self.mFollow[self.mStart] = { '$' }                           # type: ignore
    change = True
    while change:
        change = False
        for rule in self.mRules:
            a, body = rule.mVariable, rule.mBody
            for i in range(len(body)):
                if is_var(body[i]):
                    yi        = body[i]
                    Tail      = self.first_list(body[i+1:])       # type: ignore
                    firstTail = eps_union(Tail, self.mFollow[a])  # type: ignore
                    if not (firstTail <= self.mFollow[yi]):       # type: ignore
                        change = True
                        self.mFollow[yi] |= firstTail             # type: ignore
    print('Follow sets (note that "$" denotes the end of file):')
    for v in self.mVariables:
        print(f'Follow({v}) = {self.mFollow[v]}')                 # type: ignore
        
Grammar.compute_follow = compute_follow                           # type: ignore
del compute_follow

If $\mathcal{M}$ is a set of *marked rules*, then the *closure* of $\mathcal{M}$ is the smallest set $\mathcal{K}$ such that
we have the following:
- $\mathcal{M} \subseteq \mathcal{K}$,
- If $a \rightarrow \beta \bullet c\, \delta$ is a *marked rule* from 
  $\mathcal{K}$, and $c$ is a variable and if, furthermore,
  $c \rightarrow \gamma$ is a grammar rule,
  then the marked rule $c \rightarrow \bullet \gamma$
  is an element of $\mathcal{K}$:
  $$(a \rightarrow \beta \bullet c\, \delta) \in \mathcal{K} 
         \;\wedge\; 
         (c \rightarrow \gamma) \in R
         \;\Rightarrow\; (c \rightarrow \bullet \gamma) \in \mathcal{K}
  $$

We define $\texttt{closure}(\mathcal{M}) := \mathcal{K}$.  The function `cmp_closure` computes this closure for a given set of *marked rules* via a *fixed-point iteration*.

In [None]:
def cmp_closure(self, Marked_Rules: set[MarkedRule]) -> frozenset[MarkedRule]:
    All_Rules = Marked_Rules
    New_Rules = Marked_Rules
    while True:
        More_Rules = set()
        for rule in New_Rules:
            c = rule.next_var()                                  # type: ignore
            if c == None:
                continue
            for rule in self.mRules:
                head, alpha = rule.mVariable, rule.mBody         # type: ignore
                if c == head:
                    More_Rules |= { MarkedRule(head, (), alpha) }
        if More_Rules <= All_Rules:
            return frozenset(All_Rules)
        New_Rules  = More_Rules - All_Rules
        All_Rules |= New_Rules

Grammar.cmp_closure = cmp_closure                                 # type: ignore
del cmp_closure

Given a set of *marked rules* $\mathcal{M}$ and a *grammar symbol* $X$, the function $\texttt{goto}(\mathcal{M}, X)$ 
is defined as follows:
$$\texttt{goto}(\mathcal{M}, X) := \texttt{closure}\Bigl( \bigl\{ 
   a \rightarrow \beta\, X \bullet \delta \bigm| (a \rightarrow \beta \bullet X\, \delta) \in \mathcal{M} 
   \bigr\} \Bigr).
$$

In [None]:
def goto(self, Marked_Rules, x):
    Result = set()
    for mr in Marked_Rules:
        if mr.symbol_after_dot() == x:
            Result.add(mr.move_dot())
    return self.cmp_closure(Result)

Grammar.goto = goto  # type: ignore
del goto

The function `all_states` computes the set of all states of an *SLR-parser*.  The function starts with the state
$$ \texttt{closure}\bigl(\{ \widehat{s} \rightarrow \bullet s \, $\}\bigr) $$
and then tries to compute new states by using the function `goto`.  This computation proceeds via a 
*fixed-point iteration*.  Once all states have been computed, the function assigns names to these states.
This association is stored in the dictionary *mStateNames*.

In [None]:
def all_states(self) -> None: 
    start_state  = self.cmp_closure({ MarkedRule('ŝ', (), (self.mStart, '$')) })
    self.mStates = { start_state }
    New_States   = self.mStates
    while True:
        More_States = set()
        for Rule_Set in New_States:
            for mr in Rule_Set: 
                if not mr.is_complete():
                    x = mr.symbol_after_dot()
                    if x != '$':
                        More_States |= { self.goto(Rule_Set, x) }
        if More_States <= self.mStates:
            break
        New_States = More_States - self.mStates;
        self.mStates |= New_States
    print("All SLR-states:")
    counter = 1
    self.mStateNames[start_state] = 's0'
    print(f's0 = {set(start_state)}')
    for state in self.mStates - { start_state }:
        self.mStateNames[state] = f's{counter}'
        print(f's{counter} = {set(state)}')
        counter += 1

Grammar.all_states = all_states # type: ignore
del all_states

The following function computes the *action table* and is defined as follows:
- If $\mathcal{M}$ contains a *marked rule* of the form $a \rightarrow \beta \bullet t\, \delta$
  then we have
  $$\texttt{action}(\mathcal{M},t) := \langle \texttt{shift}, \texttt{goto}(\mathcal{M},t) \rangle.$$
- If $\mathcal{M}$ contains a marked rule of the form $a \rightarrow \beta\, \bullet$ and we have
  $t \in \texttt{Follow}(a)$, then we define
  $$\texttt{action}(\mathcal{M},t) := \langle \texttt{reduce}, a \rightarrow \beta \rangle$$
- If $\mathcal{M}$ contains the marked rule $\widehat{s} \rightarrow s \bullet \$ $, then we define 
  $$\texttt{action}(\mathcal{M},\$) := \texttt{accept}. $$
- Otherwise, we have
  $$\texttt{action}(\mathcal{M},t) := \texttt{error}. $$

In [None]:
def compute_action_table(self):
    self.mActionTable = {}
    print('\nAction Table:')
    for state in self.mStates:
        stateName = self.mStateNames[state]
        actionTable = {}
        # compute shift actions
        for token in self.mTokens:
            if token != '$':
                newState  = self.goto(state, token)
                if newState != set():
                    newName = self.mStateNames[newState]
                    actionTable[token] = ('shift', newName)
                    self.mActionTable[stateName, token] = ('shift', newName)
                    print(f'action("{stateName}", {token}) = ("shift", {newName})')
        # compute reduce actions
        for mr in state:
            if mr.is_complete():
                for token in self.mFollow[mr.mVariable]:
                    action1 = actionTable.get(token)
                    action2 = ('reduce', mr.to_rule())
                    if action1 == None:
                        actionTable[token] = action2  
                        r = self.mRuleNames[mr.to_rule()]
                        self.mActionTable[stateName, token] = ('reduce', r)
                        print(f'action("{stateName}", {token}) = {action2}')
                    elif action1 != action2: 
                        self.mConflicts = True
                        print('')
                        print(f'conflict in state {stateName}:')
                        print(f'{stateName} = {state}')
                        print(f'action("{stateName}", {token}) = {action1}')     
                        print(f'action("{stateName}", {token}) = {action2}')
                        print('')
        for mr in state:
            if mr == MarkedRule('ŝ', (self.mStart,), ('$',)):
                actionTable['$'] = 'accept'
                self.mActionTable[stateName, '$'] = 'accept'
                print(f'action("{stateName}", $) = accept')

Grammar.compute_action_table = compute_action_table # type: ignore
del compute_action_table

The function `compute_goto_table` computes the *goto table*.

In [None]:
def compute_goto_table(self) -> None:
    self.mGotoTable = {}
    print('\nGoto Table:')
    for state in self.mStates:
        for var in self.mVariables:
            newState = self.goto(state, var)
            if newState != set():
                stateName = self.mStateNames[state]
                newName   = self.mStateNames[newState]
                self.mGotoTable[stateName, var] = newName
                print(f'goto({stateName}, {var}) = {newName}')

Grammar.compute_goto_table = compute_goto_table # type: ignore
del compute_goto_table

In [None]:
grammar

In [None]:
%%time
g = Grammar(grammar)

In [None]:
def strip_quotes(t):
    if t[0] == "'" and t[-1] == "'":
        return t[1:-1]
    return t

In [None]:
def dump_parse_table(self, file: str) -> None:
    with open(file, 'w', encoding="utf-8") as handle:
        handle.write('# Grammar rules:\n')
        for rule in self.mRules:
            rule_name = self.mRuleNames[rule] 
            handle.write(f'{rule_name} = ("{rule.mVariable}", {rule.mBody})\n')
        handle.write('\n# Action table:\n')
        handle.write('actionTable = {}\n')
        for s, t in self.mActionTable:
            action = self.mActionTable[s, t]
            t = strip_quotes(t)
            if action[0] == 'reduce':
                rule_name = action[1]
                handle.write(f"actionTable['{s}', '{t}'] = ('reduce', {rule_name})\n")
            elif action == 'accept':
                handle.write(f"actionTable['{s}', '{t}'] = 'accept'\n")
            else:
                handle.write(f"actionTable['{s}', '{t}'] = {action}\n")
        handle.write('\n# Goto table:\n')
        handle.write('gotoTable = {}\n')
        for s, v in self.mGotoTable:
            state = self.mGotoTable[s, v]
            handle.write(f"gotoTable['{s}', '{v}'] = '{state}'\n")
        
Grammar.dump_parse_table = dump_parse_table # type: ignore
del dump_parse_table

In [None]:
g.dump_parse_table('parse-table.py') # type: ignore

In [None]:
!cat parse-table.py

In [None]:
def analyse(file):
    rules        = parse(file)
    grammarRules = transform(rules) 
    grammar      = Grammar(grammarRules)
    grammar.dump_parse_table('parse-table.py')

In [None]:
analyse('Examples/arith-small.g')

In [None]:
%%time
analyse('Examples/arith-ambiguous.g')

In [None]:
%%time
analyse('Examples/c-grammar-slr.g')

In [None]:
%%time
analyse('Examples/c-grammar.g')

In [None]:
analyse('Examples/bool.g')