In [39]:
class Lexer:
    def __init__(self):
        self.string = ""
        self.pointer = 0
        self.keywords = {
            "int": (5, "-"),
            "else": (15, "-"),
            "if": (17, "-"),
            "while": (20, "-")
        }

        self.constants = {
            "整数": (100, "整数")
        }

        self.operators = {
            "+": (41, "-"),
            "-": (42, "-"),
            "*": (43, "-"),
            "/": (44, "-"),
            "%": (45, "-"),
            "=": (46, "-"),
            ">": (47, "-"),
            ">=": (48, "-"),
            "<": (49, "-"),
            "<=": (50, "-"),
            "==": (51, "-"),
            "!=": (52, "-"),
            "&&": (53, "-"),
            "||": (54, "-"),
            "!": (55, "-"),
            "++": (56, "-"),
            "--": (57, "-")
        }

        self.delimiters = {
            "(": (81, "-"),
            ")": (82, "-"),
            ";": (84, "-"),
            "{": (86, "-"),
            "}": (87, "-"),
            "[": (88, "-"),
            "]": (89, "-")
        }
        
    def count_tokens(self):
        tokens = []
        current_token = ""
        i = 0
        while i < len(self.string):
            if self.string[i].isdigit():  # Check if the character is a digit
                current_token += self.string[i]
                i += 1
                while i < len(self.string) and self.string[i].isdigit():
                    current_token += self.string[i]
                    i += 1
                tokens.append((100, current_token))  # Append the Integer constant with token type 100
                current_token = ""
            elif self.string[i].isalnum() or self.string[i] == '_':
                current_token += self.string[i]
                i += 1
                while i < len(self.string) and (self.string[i].isalnum() or self.string[i] == '_'):
                    current_token += self.string[i]
                    i += 1
                if current_token in self.keywords:
                    tokens.append(self.keywords[current_token])
                else:
                    tokens.append(self.is_identifier(current_token))
                current_token = ""
            elif self.string[i] in self.operators or self.string[i:i+2] in self.operators:
                if self.string[i:i+2] in self.operators:
                    tokens.append(self.operators[self.string[i:i+2]])
                    i += 2
                else:
                    tokens.append(self.operators[self.string[i]])
                    i += 1
            elif self.string[i] in self.delimiters:
                tokens.append(self.delimiters[self.string[i]])
                i += 1
            elif self.string[i].isspace():
                i += 1
            else:
                raise ValueError("Illegal character: {}".format(self.string[i]))
        return len(tokens)
    
    def is_identifier(self, token):
        return 111, token

    def get_next_token(self):
        char = self.string[self.pointer]
        token=""
        if char.isdigit():  # Check if the character is a digit
            token = char
            self.pointer += 1
            while self.pointer < len(self.string) and self.string[self.pointer].isdigit():
                token += self.string[self.pointer]
                self.pointer += 1
            return (100, token)  # Return the Integer constant with token type 100
        elif char.isalnum() or char == '_':
            token = char
            self.pointer += 1
            while self.pointer < len(self.string) and (self.string[self.pointer].isalnum() or self.string[self.pointer] == '_'):
                token += self.string[self.pointer]
                self.pointer += 1
            if token in self.keywords:
                return self.keywords[token]
            else:
                return self.is_identifier(token)
        elif char in self.operators or (self.string[self.pointer:self.pointer + 2]) in self.operators:
            if (self.string[self.pointer:self.pointer + 2]) in self.operators:
                self.pointer += 2
                return self.operators[self.string[self.pointer-2:self.pointer]]
            else:
                self.pointer += 1
                return self.operators[char]
        elif char in self.delimiters:
            self.pointer += 1
            return self.delimiters[char]
        elif char.isspace():
            self.pointer += 1
            return self.get_next_token()

    def analyze(self):
        if self.pointer >= len(self.string):
            return None
        return self.get_next_token()

    def analyze_file(self, file_path):
        try:
            with open(file_path, 'r') as file:
                file_content = file.read()  # Read file contents once
                self.string = file_content  # Assign the content to self.string
                self.token_num = self.count_tokens()
        except FileNotFoundError:
            print("File not found:", file_path)


In [41]:
class Parser:
    def __init__(self, lexer):
        self.lexer = lexer
        self.current_token = None
        self.step = 1

    def eat(self, token_type):
        if self.current_token is not None and self.current_token[0] == token_type:
            self.current_token = self.lexer.analyze()
        else:
            raise ValueError("Unexpected token")

    def print_step(self, production):
        print(f"({self.step}) {production}")
        self.step += 1

    def factor(self):
        if self.current_token is None:
            return
        self.print_step("factor ⟶ num")
        token_type = self.current_token[0]
        if token_type == 100:  # Integer constant
            token_value = self.current_token[1]
            self.eat(100)
            return int(token_value)  # Convert the token value to integer
        elif token_type == 111:  # Identifier
            self.eat(111)
            return self.current_token[1]
        elif self.current_token[1] == '(':  # Expression within parentheses
            self.eat(81)  # Consume '('
            result = self.expression()
            self.eat(82)  # Consume ')'
            return result

    def unary(self):
        if self.current_token is None:
            return
        self.print_step("unary ⟶ factor")
        return self.factor()

    def term(self):
        if self.current_token is None:
            return
        self.print_step("term ⟶ unary rest6")
        result = self.unary()
        while self.current_token is not None and self.current_token[0] in (43, 44):  # 43: '*', 44: '/'
            op = self.current_token[0]
            self.eat(op)
            if op == 43:  # '*'
                self.print_step("rest6 ⟶ * unary rest6")
                result *= self.unary()
            elif op == 44:  # '/'
                self.print_step("rest6 ⟶ / unary rest6")
                result /= self.unary()
        self.print_step("rest6 ⟶ ℇ")  # Empty production
        return result

    def expression(self):
        if self.current_token is None:
            return
        self.print_step("expr ⟶ term rest5")
        result = self.term()
        while self.current_token is not None and self.current_token[0] in (41, 42):  # 41: '+', 42: '-'
            op = self.current_token[0]
            self.eat(op)
            if op == 41:  # '+'
                self.print_step("rest5 ⟶ + term rest5")
                result += self.term()
            elif op == 42:  # '-'
                self.print_step("rest5 ⟶ - term rest5")
                result -= self.term()
        self.print_step("rest5 ⟶ ℇ")  # Empty production
        return result

    def parse(self):
        self.current_token = self.lexer.analyze()
        return self.expression()


lexer = Lexer()
lexer.analyze_file("source_code.txt")  # Assuming source_code.txt contains the input code
parser = Parser(lexer)
result = parser.parse()


(1) expr ⟶ term rest5
(2) term ⟶ unary rest6
(3) unary ⟶ factor
(4) factor ⟶ num
(5) rest6 ⟶ ℇ
(6) rest5 ⟶ + term rest5
(7) term ⟶ unary rest6
(8) unary ⟶ factor
(9) factor ⟶ num
(10) rest6 ⟶ * unary rest6
(11) unary ⟶ factor
(12) factor ⟶ num
(13) rest6 ⟶ / unary rest6
(14) unary ⟶ factor
(15) factor ⟶ num
(16) rest6 ⟶ ℇ
(17) rest5 ⟶ - term rest5
(18) term ⟶ unary rest6
(19) unary ⟶ factor
(20) factor ⟶ num
(21) rest6 ⟶ ℇ
(22) rest5 ⟶ ℇ
