In [None]:
class Grammar:
    @staticmethod
    def parseLine(line):
        return [ value.strip() for value in line.strip().split('=')[1].strip()[1:-1].strip().split(',')]
    
    @staticmethod 
    def parseConsole(line):
        return [ value.strip() for value in line.strip()[1:-1].strip().split(',')]
    
    @staticmethod
    def fromFile(fileName):
        with open(fileName) as file: 
            N = Grammar.parseLine(file.readline())
            E = Grammar.parseLine(file.readline())
            S = file.readline().split('=')[1].strip()
            P = Grammar.parseRules(Grammar.parseLine(''.join([line for line in file])))
            
            return Grammar(N, E, P, S)
        
    @staticmethod 
    def fromConsole():
        N = Grammar.parseConsole(input('N = '))
        E = Grammar.parseConsole(input('E = '))
        S = input('S = ')
        P = Grammar.parseRules(Grammar.parseConsole(input('P = ')))

        return Grammar(N, E, P, S)
        
    @staticmethod        
    def parseRules(rules):
        result = []
        
        for rule in rules:
            lhs, rhs = rule.split('->')
            lhs = lhs.strip()
            rhs = [ value.strip() for value in rhs.split('|')]
            
            for value in rhs: 
                result.append((lhs, value))
        
        return result 
    
    @staticmethod
    def fromFiniteAutomata(fa):
        N = fa.Q
        E = fa.E 
        S = fa.q0
        P = []
        
        for transition in fa.S: 
            lhs, state2 = transition
            state1, route = lhs
            
            P.append((state1, route + state2))
            
            if state2 in fa.F: 
                P.append((state1, route))
                
        return Grammar(N, E, P, S)
    
    def __init__(self, N, E, P, S):
        self.N = N 
        self.E = E
        self.P = P
        self.S = S
        
    def isNonTerminal(self, value):
        return value in self.N
    
    def isTerminal(self, value):
        return value in self.E 
    
    def isRegular(self):
        usedInRhs = dict() 
        notAllowedInRhs = list() 
        
        for rule in self.P: 
            lhs, rhs = rule
            hasTerminal = False 
            hasNonTerminal = False
            for char in rhs: 
                if self.isNonTerminal(char): 
                    usedInRhs[char] = True
                    hasNonTerminal = True
                elif self.isTerminal(char): 
                    if hasNonTerminal: 
                        return False
                    hasTerminal = True 
                if char == 'E': 
                    notAllowedInRhs.append(lhs)
                    
            if hasNonTerminal and not hasTerminal: 
                return False
        
        for char in notAllowedInRhs: 
            if char in usedInRhs: 
                return False 
            
        return True
   
    def getProductionsFor(self, nonTerminal): 
        if not self.isNonTerminal(nonTerminal):
            raise Exception('Can only show productions for non-terminals')
            
        return [ prod for prod in self.P if prod[0] == nonTerminal ]
    
    def showProductionsFor(self, nonTerminal):
        productions = self.getProductionsFor(nonTerminal)
        
        print(', '.join([' -> '.join(prod) for prod in productions]))
        
    def __str__(self):
        return 'N = { ' + ', '.join(self.N) + ' }\n' \
             + 'E = { ' + ', '.join(self.E) + ' }\n' \
             + 'P = { ' + ', '.join([' -> '.join(prod) for prod in self.P]) + ' }\n' \
             + 'S = ' + str(self.S) + '\n'

In [None]:
class Lr0Parser: 
    
    def __init__(self, grammar): 
        self.__grammar = grammar
        self.__workingStack = []
        self.__inputStack = []
        self.__output = [] 
        
    def closure(self, productions): 
        
        if productions == []:
            return None
        
        C = productions
        finished = False 
        
        while not finished:
            finished = True 
            for dottedProd in C:
                alpha, Bbeta = dottedProd[1].split('.')

                if len(Bbeta) == 0: 
                    continue
                    
                B = Bbeta[0]
                if self.__grammar.isTerminal(B): 
                    continue
                    
                for prod in self.__grammar.getProductionsFor(B):
                    dottedProd = (prod[0], '.' + prod[1])
                    if dottedProd not in C: 
                        C += [ dottedProd ]
                        finished = False 
        return C
        
    
    def goTo(self, state, symbol):
        C = []
        
        for dottedProd in state: 
            alpha, Xbeta = dottedProd[1].split('.')
            
            if len(Xbeta) == 0: 
                continue
            
            X, beta = Xbeta[0], Xbeta[1:]
            
            if X == symbol:
                resultProd = (dottedProd[0], alpha + X + '.' + beta)
                C = C + [ resultProd ]
        
        return self.closure(C)
        
    def getCannonicalCollection(self): 
        C = [ self.closure([('S1', '.S')]) ]
        
        finished = False 
        
        while not finished: 
            finished = True 
            
            for state in C: 
                for symbol in self.__grammar.N + self.__grammar.E: 
                    nextState = self.goTo(state, symbol)
                    if nextState is not None and nextState not in C: 
                        C = C + [ nextState ]
                        finished = False
    
        return C
    
    def getTable(self): 
        states = self.getCannonicalCollection()
        table = [{} for _ in range(len(states))]
        
        for index, state in enumerate(states):
            meetsFirstRule = 0
            meetsSecondRule = 0 
            meetsThirdRule = 0 
            
            for prod in state: 
                alpha, beta = prod[1].split('.')
                
                if len(beta) != 0:
                    meetsFirstRule += 1
                if len(beta) == 0: 
                    if prod[0] != 'S1':
                        meetsSecondRule += 1
                        productionIndex = self.__grammar.P.index((prod[0], alpha))
                    elif alpha == 'S': 
                        meetsThirdRule += 1
                
                
            if meetsFirstRule == len(state): 
                table[index]['action'] = 'shift'
                
            elif meetsSecondRule == len(state):
                table[index]['action'] = 'reduce ' + str(productionIndex)
                
            elif meetsThirdRule == len(state):
                table[index]['action'] = 'acc'
            else: 
                raise(Exception('No action detected for state ' + str(index) + ' ' + str(state)))
                
            
            for symbol in self.__grammar.N + self.__grammar.E: 
                nextState = self.goTo(state, symbol)
                if nextState in states: 
                    table[index][symbol] = states.index(nextState)
            
        return table
    
    
    def parse(self, inputSequence): 
        table = self.getTable()
        
        self.__workingStack = ['0']
        self.__inputStack = [symbol for symbol in inputSequence]
        self.__output = []
        
        while len(self.__workingStack) != 0: 
            state = int(self.__workingStack[-1])
            if len(self.__inputStack) > 0:
                symbol = self.__inputStack.pop(0)
            else: 
                symbol = None
                
            if table[state]['action'] == 'shift': 
                if symbol not in table[state]: 
                    print(self.__workingStack)
                    print(self.__inputStack)
                    print(state)
                    print(symbol)
                    
                    raise(Exception('Cannot parse shift'))
                self.__workingStack.append(symbol)
                self.__workingStack.append(table[state][symbol])
                
            elif table[state]['action'] == 'acc': 
                if len(self.__inputStack) != 0:
                    raies(Exception('Cannot Parse acc'))
                
                self.__workingStack.clear()
                
            else: 
                reducedState = int(table[state]['action'].split(' ')[1])
                reducedProduction = self.__grammar.P[reducedState]
                
                toRemoveFromWorkingStack = [symbol for symbol in reducedProduction[1]]
                
                while len(toRemoveFromWorkingStack) > 0 and len(self.__workingStack) > 0:
                    if self.__workingStack[-1] == toRemoveFromWorkingStack[-1]: 
                        toRemoveFromWorkingStack.pop()
                    self.__workingStack.pop()
                    
                if len(toRemoveFromWorkingStack) != 0: 
                    raise(Exception('Cannot Parse reduce'))
                
                self.__inputStack.insert(0, reducedProduction[0])
                self.__output.insert(0, str(reducedState))
            
        return self.__output

In [None]:
g = Grammar.fromFile("grammar.txt")
lr0 = Lr0Parser(g)

print(lr0.parse('abbc'))

In [None]:
g = Grammar.fromFile("grammar.txt")
lr0 = Lr0Parser(g)

for index, line in enumerate(lr0.getTable()): 
    print(index, line)

In [None]:
g = Grammar.fromFile("grammar.txt")
lr0 = Lr0Parser(g)

s = lr0.closure([ ('S1', '.S') ])

for s in g.N + g.E: 
    print('goto( s0, ' + s + ') = ', g.goTo(s0, s))

In [None]:
g = Grammar.fromFile("grammar.txt")
lr0 = Lr0Parser(g)

for index, state in enumerate(lr0.getCannonicalCollection()):
    print(index, state)