In [445]:
import pandas as pd
import typing as t

In [446]:
table = pd.read_csv('table.csv')
table

Unnamed: 0,;,type,id,=,(,),{,},",",return,...,предикат,список_слагаемых,слагаемое,список_множителей,множитель,список_логических_выражений,логическое_выражение,вызов_логического_выражения,параметры_вызова,список_значений
0,,s7,s8,,,,,,,s10,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,s11,,,,,,,,,,...,,,,,,,,,,
3,r3,,,,,,,,,,...,,,,,,,,,,
4,r4,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,,s7,s8,,,,,r2,,s10,...,,,,,,,,,,
153,,,,,,r14,,,s132,,...,,,,,,,,,,
154,,,,,,,,r1,,,...,,,,,,,,,,
155,,,,,,r13,,,,,...,,,,,,,,,,


In [447]:
class Stack:
    def __init__(self) -> None:
        self.stack = []
        
    def push(self, value: t.Any) -> None:
        self.stack.append(value)
        
    def pop_n(self, n: int = 1) -> t.List[t.Any]:
        return [self.stack.pop(-1) for _ in range(n)]
    
    def watch_head(self) -> t.Any:
        return self.stack[-1]
    
    def len(self) -> int:
        return len(self.stack)
    
    def is_empty(self) -> bool:
        return self.len() == 0

In [448]:
class Lexer:
    def __init__(self, rules_meta: t.Dict[str, t.Dict[str, int | str]]) -> None:
        self._token_sequence: list = None
        self.rules_meta = rules_meta
        
    def set_token_sequence(self, token_sequence: t.List[str]) -> None:
        self._token_sequence = token_sequence

    def get_current_token(self) -> str:
        return self._token_sequence[0]
        
    def accept(self) -> None:
        self._token_sequence = self._token_sequence[1:]

In [449]:
from __future__ import annotations
from abc import ABC, abstractmethod


class State(ABC):
    @abstractmethod
    def next(self) -> t.Tuple[str, str]:
        pass

In [450]:
class ErrorState:
    def __init__(self, symbol: str, i: str, state_stack: Stack, symbol_stack: Stack, lexer: Lexer) -> None:
        self._symbol = symbol
        self._i = i
        self._state_stack = state_stack
        self._symbol_stack = symbol_stack
        self._lexer = lexer
        
    def next(self) -> t.Tuple[str, str]:
        raise Exception()

In [451]:
class STerminalState:
    def __init__(self, symbol: str, i: str, state_stack: Stack, symbol_stack: Stack, lexer: Lexer) -> None:
        self._symbol = symbol
        self._i = i
        self._state_stack = state_stack
        self._symbol_stack = symbol_stack
        self._lexer = lexer
        
    def next(self) -> t.Tuple[str, str]:
        self._symbol_stack.push(self._symbol)
        self._state_stack.push(self._i)
        self._lexer.accept()
        return self._lexer.get_current_token(), self._i

    def __repr__(self):
        return f's{self._i}'

In [452]:
class SNonTerminalState:
    def __init__(self, symbol: str, i: str, state_stack: Stack, symbol_stack: Stack, lexer: Lexer) -> None:
        self._symbol = symbol
        self._i = i
        self._state_stack = state_stack
        self._symbol_stack = symbol_stack
        self._lexer = lexer
        
    def next(self) -> t.Tuple[str, str]:
        self._symbol_stack.push(self._symbol)
        self._state_stack.push(self._i)
        return self._lexer.get_current_token(), self._i
    
    def __repr__(self):
        return f's{self._i}'

In [453]:
class RState:
    def __init__(self, symbol: str, i: str, state_stack: Stack, symbol_stack: Stack, lexer: Lexer) -> None:
        self._symbol = symbol
        self._i = i
        self._state_stack = state_stack
        self._symbol_stack = symbol_stack
        self._lexer = lexer
        
    def next(self) -> t.Tuple[str, str]:
        n = self._lexer.rules_meta[self._i]['right_n']
        _ = self._state_stack.pop_n(n)
        _ = self._symbol_stack.pop_n(n)
        
        return self._lexer.rules_meta[self._i]['left_symbol'], self._state_stack.watch_head()
    
    def __repr__(self):
        return f'r{self._i}'

In [454]:
class LR1Parser:
    def __init__(self, table: pd.DataFrame, rules_meta: t.Dict[str, t.Dict[str, int | str]], terminal_symbols: t.List[str]) -> None:
        self._state_stack = Stack()
        self._symbol_stack = Stack()
        self._lexer = Lexer(rules_meta)
        self._symbol_table: t.Dict[t.Tuple[str, str]: State] = self.__get_symbol_table(table.to_dict(), terminal_symbols)
        
    def __get_symbol_table(self, symbol_table_str: t.Dict[str, t.Dict[int, str]], terminal_symbols: t.List[str]) -> t.Dict[t.Tuple[str, str]: State]:
        res = {}
        action_states = {
            ('s', True): STerminalState,
            ('s', False): SNonTerminalState,
            ('r', True): RState,
            ('r', False): RState,
            'e': ErrorState
        }
        
        for symbol in symbol_table_str.keys():
            for state_id in symbol_table_str[symbol].keys():
                value = symbol_table_str[symbol][state_id]
                try:
                    s_type = value[0]
                    i = str(value[1:])
                    key = (s_type, symbol in terminal_symbols)
                except:
                    i = None
                    key = 'e'
                    
                state = action_states[key](
                    symbol, i, self._state_stack, self._symbol_stack, self._lexer
                )
            
                res[(symbol, str(state_id))] = state
                
        return res
                
    def analyze(self, word: str, start_state_id: str = '0', sos_symbol: str = 'S', eof_symbol: str = 'eof') -> bool:
        self._state_stack.stack = [start_state_id]

        token_sequence = word.split()
        token_sequence.append(eof_symbol)
        self._lexer.set_token_sequence(token_sequence)
        
        start_symbol = self._lexer.get_current_token()
        
        while not (
            start_symbol == sos_symbol and 
            self._symbol_stack.is_empty() and 
            self._state_stack.watch_head() == start_state_id and
            self._state_stack.len() == 1
        ):
            try:
                start_symbol, start_state_id = self._symbol_table[(start_symbol, start_state_id)].next()
            except:
                return False
        return True

In [455]:
import json


with open('rules_meta.json', 'r') as json_file:
    rules_meta = json.load(json_file)

print(rules_meta)

{'0': {'right_n': 1, 'left_symbol': 'S'}, '1': {'right_n': 3, 'left_symbol': 'список_операторов'}, '2': {'right_n': 0, 'left_symbol': 'список_операторов'}, '3': {'right_n': 1, 'left_symbol': 'оператор'}, '4': {'right_n': 1, 'left_symbol': 'оператор'}, '5': {'right_n': 1, 'left_symbol': 'оператор'}, '6': {'right_n': 1, 'left_symbol': 'оператор'}, '7': {'right_n': 3, 'left_symbol': 'объявление'}, '8': {'right_n': 2, 'left_symbol': 'тело_объявления'}, '9': {'right_n': 6, 'left_symbol': 'тело_объявления'}, '10': {'right_n': 0, 'left_symbol': 'тело_объявления'}, '11': {'right_n': 3, 'left_symbol': 'параметры_объявления'}, '12': {'right_n': 0, 'left_symbol': 'параметры_объявления'}, '13': {'right_n': 4, 'left_symbol': 'список_параметров'}, '14': {'right_n': 0, 'left_symbol': 'список_параметров'}, '15': {'right_n': 2, 'left_symbol': 'возвращение'}, '16': {'right_n': 3, 'left_symbol': 'присваивание'}, '17': {'right_n': 2, 'left_symbol': 'константа'}, '18': {'right_n': 2, 'left_symbol': 'выраже

In [456]:
with open('terminal_symbols.txt', 'r', encoding='utf-8') as f:
    terminal_symbols = f.readline().split()

print(terminal_symbols)

[';', 'type', 'id', '=', '(', ')', '{', '}', ',', 'return', 'const', '<', '>', '<=', '>=', '==', '!=', '+', '-', '*', '/', '&&', '||', '!', 'true', 'false', 'number', 'eof', 'S']


In [457]:
lr1 = LR1Parser(table, rules_meta, terminal_symbols)

In [458]:
import re


example = """
    type id ;
    type id = ( number + number ) / ( number - number * ( id > number ) ) ;
    type id = true && false ;
    type id = id || true ;
    type id = ! id ;
    type id = ! true ;
    id = id >= id ;
    const type id = number ;
    type id ( type id , type id ) {
        type id = id + number ;
        return id + id ;
    } ;
"""

example = re.sub('\n', ' ', re.sub('\t', '', example))

In [459]:
lr1.analyze(example)

True