In [66]:
import re
from dataclasses import dataclass
from queue import Queue

In [67]:
class L_Nums():
    Tag_ERROR = 1
    Tag_T = 2
    Tag_Nt = 3
    Tag_AXIOM = 4
    Tag_COMMENT = 5
    Tag_OPEN_GROUP = 6
    Tag_CLOSE_GROUP = 7
    Tag_UNMATCHED = 8
    Tag_END = 9


class Token:
    
    def __init__(self, tag = None, value = ""):
        self.tag = tag
        self.text = value

class CoordsToken(Token):

    def __init__ (self, tag = None, value = "", index_start = 0, index_end = 0):
        self.tag = tag
        self.text = value
        self.index_start = index_start
        self.index_end = index_end

    def __repr__(self):
        return str(self.tag) + '(' + str(self.index_start) + ',' + str(self.index_end) + '):' + str(self.text)

class Lexer:

    def token_matcher(self, text_input):

        matched = re.match(r"'.*\n", text_input)
        if matched:
            return Token(tag=L_Nums.Tag_COMMENT, value=matched.group())
        matched = re.match(r"<axiom <({})>>\n".format(r"[A-Z]'?"), text_input)
        if matched:
            return Token(tag=L_Nums.Tag_AXIOM, value=matched.group())
        matched = re.match("<", text_input)
        if matched:
            return Token(tag=L_Nums.Tag_OPEN_GROUP, value=matched.group())
        matched = re.match(">", text_input)
        if matched:
            return Token(tag=L_Nums.Tag_CLOSE_GROUP, value=matched.group())
        matched = re.match(r"[a-z\(\)\+\*]",text_input)
        if matched:
            return Token(tag=L_Nums.Tag_T, value=matched.group())
        matched = re.match(r"[A-Z]'?", text_input)
        if matched:
            return Token(tag=L_Nums.Tag_Nt, value=matched.group())

        return Token(tag=L_Nums.Tag_UNMATCHED, value=text_input[0])

    def perform_tokenize(self, text_input):
        new_tokens = Queue()
        idx = 0

        while idx < len(text_input):
            token = self.token_matcher(text_input[idx:])
            if token.tag == L_Nums.Tag_UNMATCHED and token.text.isspace() or token.tag == L_Nums.Tag_COMMENT:
                idx += len(token.text)
            else:
                if token.tag == L_Nums.Tag_AXIOM:
                    axiom_value = re.search(r"[A-Z]'?", text_input).group(0)
                    t = CoordsToken(L_Nums.Tag_AXIOM, axiom_value, idx, idx + len(token.text))
                    new_tokens.put(CoordsToken(L_Nums.Tag_AXIOM, axiom_value, idx, idx + len(token.text)))
                else:
                    new_tokens.put(CoordsToken(token.tag, token.text, idx, idx + len(token.text)))
                idx += len(token.text)

        new_tokens.put(CoordsToken(L_Nums.Tag_END, "", idx+1, idx+1))
        return new_tokens




In [68]:
class Nt:
    def __init__(self, str, id=0):
        self.value = str
        self.num_rule = id
        self.accessors = []

    def __hash__(self):
        return hash(self.value)

    def __str__(self):
        return self.value
    
    def print(self, indent):
        print(indent + str(self) + ":")
        for child in self.accessors:
            child.print(indent + "\t")

    def __eq__(self, other):
        return (isinstance(other, Nt) and
                self.value == other.value)


class T:
    def __init__(self, str):
        self.value = str

    def __hash__(self):
        return hash(self.value)

    def __str__(self):
        return self.value
    
    def print(self, indent):
        print(indent + str(self))

    def __eq__(self, other):
        return (isinstance(other, T) and
                self.value == other.value)
    
    
    

In [69]:
class Leaf:
    def __init__(self, value):
        self.value = value
      
    def print(self, indent=""):
        print(str(indent) + 'End Node:' + str(self.value))

class Inner:
    def __init__(self, non_term, id):
        self.non_term = non_term
        self.rule_id = id
        self.accessors = []
      
    def print(self, indent=""):
        print(indent + 'Node:' + str(self.non_term) + ', value:' + str(self.rule_id))
        for child in self.accessors:
            child.print(indent + "\t")

class Rules_in_Table:
    def __init__(self):
        self.order_numbers = 0
        self.rules = [(Nt("Programm"), L_Nums.Tag_AXIOM), (Nt("LR"), L_Nums.Tag_OPEN_GROUP),
                      (Nt("LR"), L_Nums.Tag_OPEN_GROUP), (Nt("RLR"), L_Nums.Tag_END),
                      (Nt("OR"), L_Nums.Tag_OPEN_GROUP), (Nt("SR"), L_Nums.Tag_OPEN_GROUP),
                      (Nt("SRLR"), L_Nums.Tag_OPEN_GROUP), (Nt("SRLR"), L_Nums.Tag_CLOSE_GROUP),
                      (Nt("SRLR"), L_Nums.Tag_END), (Nt("SRT"), L_Nums.Tag_OPEN_GROUP),
                      (Nt("RLRTS"), L_Nums.Tag_T), (Nt("RLRTS"), L_Nums.Tag_Nt),
                      (Nt("RLRTS"), L_Nums.Tag_CLOSE_GROUP), (Nt("RLRTS"), L_Nums.Tag_END),
                      (Nt("RSF"), L_Nums.Tag_T), (Nt("RSF"), L_Nums.Tag_Nt),
                      (Nt("RSF"), L_Nums.Tag_CLOSE_GROUP)]
        self.rhs_rules = [([T("Stmt_Axiom"), Nt("LR")], self.order_numbers + 1),
                          ([Nt("OR"), Nt("RLR")], self.order_numbers + 1),
                          ([Nt("OR"), Nt("RLR")], self.order_numbers + 1),
                          ([], self.order_numbers + 1),
                          ([T("OpenStmt"), T("NtermStmt"), Nt("SR"), T("CloseStmt")], self.order_numbers + 1),
                          ([Nt("SRT"), Nt("SRLR")], self.order_numbers + 1),
                          ([Nt("SRT"), Nt("SRLR")], self.order_numbers + 1),
                          ([], self.order_numbers + 1),
                          ([], self.order_numbers + 1),
                          ([T("OpenStmt"), Nt("RSF"), Nt("RLRTS"), T("CloseStmt")], self.order_numbers + 1),
                          ([Nt("RSF"), Nt("RLRTS")], self.order_numbers + 1),
                          ([Nt("RSF"), Nt("RLRTS")], self.order_numbers + 1),
                          ([], self.order_numbers + 1),
                          ([], self.order_numbers + 1),
                          ([T("Term")], self.order_numbers + 1),
                          ([T("NtermStmt")], self.order_numbers + 1),
                          ([], self.order_numbers + 1)
        ]

test_str = """' аксиома
<axiom <E>>
' правила грамматики
<E  <T E'>>
' и это комментарий
<E' <+ T E'> <>> 
<T  <F T'>>
<T' <* F T'> 'и это комментарий

 <>>
<F  <n> <( E )>>"""

def top_down(tokens):
    type_mapping = {
        L_Nums.Tag_AXIOM: "Stmt_Axiom",
        L_Nums.Tag_T: "Term",
        L_Nums.Tag_Nt: "NtermStmt",
        L_Nums.Tag_OPEN_GROUP: "OpenStmt",
        L_Nums.Tag_CLOSE_GROUP: "CloseStmt"
    }

    delta = Rules_in_Table()
    sparent = Inner(None, None)
    stack = [(sparent, T('$')), (sparent, Nt('Programm'))]

    token = tokens.get()
    parent, X = stack.pop()
    
    while X.value != '$':
        if isinstance(X, T):
            if X.value == type_mapping[token.tag]:
                parent.accessors.append(Leaf(token))
                token = tokens.get()
            else:
                raise ValueError(f"T. Ожидался {X}, Получен {token}")
        elif (X, token.tag) in delta.rules:
            inner = Inner(X, delta.rhs_rules[delta.rules.find[X, token.tag][1]])
            parent.accessors.append(inner)
            for elem in delta.rhs_rules[delta.rules.find[X, token.tag]][0][::-1]:
                stack.append((inner, elem))
        else:
            raise ValueError(f"Ожидался {X}, Получен {token}")
        
        parent, X = stack.pop()

    return sparent.accessors[0]

In [70]:
lexer = Lexer()

tokens = lexer.perform_tokenize(test_str)

In [71]:
v = top_down(tokens)

In [72]:
v.print()

Node:Programm, value:0
	End Node:4(10,22):E
	Node:ListRules, value:1
		Node:OneRule, value:4
			End Node:6(43,44):<
			End Node:3(44,45):E
			Node:Stmt_RHS, value:5
				Node:Stmt_RHSTerm, value:9
					End Node:6(47,48):<
					Node:RHS_Stmt_Factor, value:15
						End Node:3(48,49):T
					Node:RestListRHSTerm_Stmt, value:11
						Node:RHS_Stmt_Factor, value:15
							End Node:3(50,52):E'
						Node:RestListRHSTerm_Stmt, value:12
					End Node:7(52,53):>
				Node:Stmt_RestListRHS, value:7
			End Node:7(53,54):>
		Node:RestListRules, value:2
			Node:OneRule, value:4
				End Node:6(75,76):<
				End Node:3(76,78):E'
				Node:Stmt_RHS, value:5
					Node:Stmt_RHSTerm, value:9
						End Node:6(79,80):<
						Node:RHS_Stmt_Factor, value:14
							End Node:2(80,81):+
						Node:RestListRHSTerm_Stmt, value:11
							Node:RHS_Stmt_Factor, value:15
								End Node:3(82,83):T
							Node:RestListRHSTerm_Stmt, value:11
								Node:RHS_Stmt_Factor, value:15
									End Node:3(84,86):E'
								Node: