# Code Written by:
**Shweta Tiwari**
*20 Oct 2023*

## Algorithm: Earley Parser

In [1]:
import time

# Algorithm

In [2]:
%%time
class EarleyParser:

    def __init__(self, grammar):
        self.grammar = grammar
        self.states = []

    def parse(self, text):
        self.states = [set() for _ in range(len(text) + 1)]
        self.states[0].add(State(*grammar.start))

        for k, token in enumerate(text + '\u0000'):
            extension = list(self.states[k])
            self.states[k].clear()

            while extension:
                state = extension.pop()
                if state in self.states[k]:
                    continue

                self.states[k].add(state)

                if state.finished:
                    self._completer(state, extension)
                elif state.symbol_is_nonterminal:
                    self._predictor(state, k, extension)
                else:
                    self._scanner(state, k, token)

        self._print(text)

    def _predictor(self, state, origin, extension):
        for rule in self.grammar[state.symbol]:
            extension.append(State(*rule, origin=origin))

    def _scanner(self, state, origin, token):
        if state.symbol == token:
            self.states[origin + 1].add(state.shift)

    def _completer(self, state, extension):
        for reduce in self.states[state.origin]:
            if state.nonterminal == reduce.symbol:
                extension.append(reduce.shift)

    def _print(self, text):
        for k, state in enumerate(self.states):
            accepts = any(s.nonterminal == '^' and s.finished for s in state)

            print('(%d)' % k, end=' ')
            print('"%s.%s"' % (text[:k], text[k:]), end=' ')
            print(accepts and 'ACCEPTS' or '')

            for i in state:
                print('\t', i)

CPU times: user 30 µs, sys: 6 µs, total: 36 µs
Wall time: 39.3 µs


In [3]:
%%time
class State:

    def __init__(self, nonterminal, expression, dot=0, origin=0):
        self.nonterminal = nonterminal
        self.expression = expression
        self.dot = dot
        self.origin = origin

    @property
    def finished(self):
        return self.dot >= len(self.expression)

    @property
    def symbol(self):
        return None if self.finished else self.expression[self.dot]

    @property
    def symbol_is_nonterminal(self):
        return self.symbol and self.symbol.isalpha() and self.symbol.isupper()

    @property
    def shift(self):
        return State(self.nonterminal, self.expression, self.dot + 1, self.origin)

    @property
    def tuple(self):
        return self.nonterminal, self.expression, self.dot, self.origin

    def __hash__(self):
        return hash(self.tuple)

    def __eq__(self, other):
        return self.tuple == other.tuple

    def __str__(self):
        n, e, d, o = self.tuple
        return '[%d] %s -> %s.%s' % (o, n, e[:d], e[d:])

CPU times: user 74 µs, sys: 0 ns, total: 74 µs
Wall time: 141 µs


In [4]:
%%time
class Grammar:

    def __init__(self, *rules):
        self.rules = tuple(self._parse(rule) for rule in rules)

    def _parse(self, rule):
        return tuple(rule.replace(' ', '').split('::='))

    @property
    def start(self):
        return next(self['^'])

    def __getitem__(self, nonterminal):
        yield from [rule for rule in self.rules if rule[0] == nonterminal]

CPU times: user 49 µs, sys: 0 ns, total: 49 µs
Wall time: 53.6 µs


## Run

## Grammar: Arithmetic Expression

In [5]:
%%time
grammar = Grammar(
    '^ ::= E',
    'E ::= E + T',
    'E ::= E - T',
    'E ::= T',
    'T ::= T * F',
    'T ::= T / F',
    'T ::= F',
    'F ::= ( E )',
    'F ::= - F',
    'F ::= x',
    'F ::= y',
    'F ::= z',
)

CPU times: user 27 µs, sys: 5 µs, total: 32 µs
Wall time: 35 µs


In [6]:
%%time
EarleyParser(grammar).parse('x-x*(y+z)')

(0) ".x-x*(y+z)" 
	 [0] F -> .x
	 [0] F -> .y
	 [0] T -> .T/F
	 [0] E -> .E+T
	 [0] ^ -> .E
	 [0] F -> .-F
	 [0] F -> .(E)
	 [0] T -> .F
	 [0] F -> .z
	 [0] E -> .T
	 [0] T -> .T*F
	 [0] E -> .E-T
(1) "x.-x*(y+z)" ACCEPTS
	 [0] E -> E.-T
	 [0] E -> E.+T
	 [0] T -> F.
	 [0] T -> T./F
	 [0] T -> T.*F
	 [0] ^ -> E.
	 [0] F -> x.
	 [0] E -> T.
(2) "x-.x*(y+z)" 
	 [2] F -> .z
	 [0] E -> E-.T
	 [2] T -> .T*F
	 [2] F -> .x
	 [2] T -> .T/F
	 [2] F -> .y
	 [2] F -> .-F
	 [2] F -> .(E)
	 [2] T -> .F
(3) "x-x.*(y+z)" ACCEPTS
	 [2] F -> x.
	 [0] E -> E.-T
	 [2] T -> T.*F
	 [0] E -> E.+T
	 [0] ^ -> E.
	 [2] T -> T./F
	 [2] T -> F.
	 [0] E -> E-T.
(4) "x-x*.(y+z)" 
	 [4] F -> .y
	 [4] F -> .-F
	 [4] F -> .(E)
	 [2] T -> T*.F
	 [4] F -> .z
	 [4] F -> .x
(5) "x-x*(.y+z)" 
	 [5] F -> .(E)
	 [5] T -> .F
	 [5] F -> .z
	 [5] E -> .E-T
	 [5] E -> .T
	 [5] T -> .T*F
	 [5] F -> .x
	 [5] E -> .E+T
	 [5] F -> .y
	 [5] T -> .T/F
	 [4] F -> (.E)
	 [5] F -> .-F
(6) "x-x*(y.+z)" 
	 [5] T -> T.*F
	 [5] E -> E.-T
	 

In [7]:
%%time
EarleyParser(grammar).parse('x-(y/x+y/z)/y*-z')

(0) ".x-(y/x+y/z)/y*-z" 
	 [0] F -> .x
	 [0] F -> .y
	 [0] T -> .T/F
	 [0] E -> .E+T
	 [0] ^ -> .E
	 [0] F -> .-F
	 [0] F -> .(E)
	 [0] T -> .F
	 [0] F -> .z
	 [0] E -> .T
	 [0] T -> .T*F
	 [0] E -> .E-T
(1) "x.-(y/x+y/z)/y*-z" ACCEPTS
	 [0] E -> E.-T
	 [0] E -> E.+T
	 [0] T -> F.
	 [0] T -> T./F
	 [0] T -> T.*F
	 [0] ^ -> E.
	 [0] F -> x.
	 [0] E -> T.
(2) "x-.(y/x+y/z)/y*-z" 
	 [2] F -> .z
	 [0] E -> E-.T
	 [2] T -> .T*F
	 [2] F -> .x
	 [2] T -> .T/F
	 [2] F -> .y
	 [2] F -> .-F
	 [2] F -> .(E)
	 [2] T -> .F
(3) "x-(.y/x+y/z)/y*-z" 
	 [3] T -> .T*F
	 [3] F -> .x
	 [3] E -> .E+T
	 [3] F -> .y
	 [3] T -> .T/F
	 [3] F -> .(E)
	 [2] F -> (.E)
	 [3] F -> .z
	 [3] F -> .-F
	 [3] T -> .F
	 [3] E -> .T
	 [3] E -> .E-T
(4) "x-(y./x+y/z)/y*-z" 
	 [3] E -> E.+T
	 [3] T -> F.
	 [3] T -> T./F
	 [2] F -> (E.)
	 [3] T -> T.*F
	 [3] F -> y.
	 [3] E -> T.
	 [3] E -> E.-T
(5) "x-(y/.x+y/z)/y*-z" 
	 [5] F -> .(E)
	 [5] F -> .z
	 [3] T -> T/.F
	 [5] F -> .x
	 [5] F -> .y
	 [5] F -> .-F
(6) "x-(y/x.+y/z)

## Grammar: Parentheses

In [8]:
%%time
grammar = Grammar(
    '^ ::= P',
    'P ::= ( )',
    'P ::= ( P )',
    'P ::= P ( )',
    'P ::= P ( P )',
)

CPU times: user 14 µs, sys: 2 µs, total: 16 µs
Wall time: 18.8 µs


In [9]:
%%time
EarleyParser(grammar).parse('()(()())()')

(0) ".()(()())()" 
	 [0] ^ -> .P
	 [0] P -> .P(P)
	 [0] P -> .P()
	 [0] P -> .(P)
	 [0] P -> .()
(1) "(.)(()())()" 
	 [1] P -> .P()
	 [1] P -> .(P)
	 [0] P -> (.P)
	 [0] P -> (.)
	 [1] P -> .()
	 [1] P -> .P(P)
(2) "().(()())()" ACCEPTS
	 [0] P -> P.()
	 [0] ^ -> P.
	 [0] P -> ().
	 [0] P -> P.(P)
(3) "()(.()())()" 
	 [3] P -> .()
	 [3] P -> .P(P)
	 [3] P -> .P()
	 [0] P -> P(.P)
	 [0] P -> P(.)
	 [3] P -> .(P)
(4) "()((.)())()" 
	 [4] P -> .P(P)
	 [4] P -> .P()
	 [4] P -> .(P)
	 [3] P -> (.P)
	 [3] P -> (.)
	 [4] P -> .()
(5) "()(().())()" 
	 [3] P -> ().
	 [3] P -> P.(P)
	 [0] P -> P(P.)
	 [3] P -> P.()
(6) "()(()(.))()" 
	 [6] P -> .()
	 [6] P -> .P(P)
	 [3] P -> P(.P)
	 [6] P -> .P()
	 [3] P -> P(.)
	 [6] P -> .(P)
(7) "()(()().)()" 
	 [3] P -> P.(P)
	 [0] P -> P(P.)
	 [3] P -> P().
	 [3] P -> P.()
(8) "()(()()).()" ACCEPTS
	 [0] P -> P.()
	 [0] ^ -> P.
	 [0] P -> P.(P)
	 [0] P -> P(P).
(9) "()(()())(.)" 
	 [9] P -> .(P)
	 [9] P -> .()
	 [9] P -> .P(P)
	 [0] P -> P(.P)
	 [9] P -> .

# The End