In [35]:
class Lexer:
    def __init__(self):
        self.string = ""
        self.pointer = 0
        self.keywords = {
            "int": (5, "-"),
            "else": (15, "-"),
            "if": (17, "-"),
            "while": (20, "-")
        }

        self.constants = {
            "整数": (100, "整数")
        }

        self.operators = {
            "+": (41, "-"),
            "-": (42, "-"),
            "*": (43, "-"),
            "/": (44, "-"),
            "%": (45, "-"),
            "=": (46, "-"),
            ">": (47, "-"),
            ">=": (48, "-"),
            "<": (49, "-"),
            "<=": (50, "-"),
            "==": (51, "-"),
            "!=": (52, "-"),
            "&&": (53, "-"),
            "||": (54, "-"),
            "!": (55, "-"),
            "++": (56, "-"),
            "--": (57, "-")
        }

        self.delimiters = {
            "(": (81, "-"),
            ")": (82, "-"),
            ",": (83, "-"),
            ";": (84, "-"),
            "{": (86, "-"),
            "}": (87, "-"),
            "[": (88, "-"),
            "]": (89, "-")
        }
        
    def count_tokens(self):
        tokens = []
        current_token = ""
        i = 0
        while i < len(self.string):
            if self.string[i].isdigit():  # Check if the character is a digit
                current_token += self.string[i]
                i += 1
                while i < len(self.string) and self.string[i].isdigit():
                    current_token += self.string[i]
                    i += 1
                tokens.append((100, current_token))  # Append the Integer constant with token type 100
                current_token = ""
            elif self.string[i].isalnum() or self.string[i] == '_':
                current_token += self.string[i]
                i += 1
                while i < len(self.string) and (self.string[i].isalnum() or self.string[i] == '_'):
                    current_token += self.string[i]
                    i += 1
                if current_token in self.keywords:
                    tokens.append(self.keywords[current_token])
                else:
                    tokens.append(self.is_identifier(current_token))
                current_token = ""
            elif self.string[i] in self.operators or self.string[i:i+2] in self.operators:
                if self.string[i:i+2] in self.operators:
                    tokens.append(self.operators[self.string[i:i+2]])
                    i += 2
                else:
                    tokens.append(self.operators[self.string[i]])
                    i += 1
            elif self.string[i] in self.delimiters:
                tokens.append(self.delimiters[self.string[i]])
                i += 1
            elif self.string[i].isspace():
                i += 1
            else:
                raise ValueError("Illegal character: {}".format(self.string[i]))
        return len(tokens)
    
    def is_identifier(self, token):
        return 111, token

    def get_next_token(self):
        char = self.string[self.pointer]
        token=""
        if char.isdigit():  # Check if the character is a digit
            token = char
            self.pointer += 1
            while self.pointer < len(self.string) and self.string[self.pointer].isdigit():
                token += self.string[self.pointer]
                self.pointer += 1
            return (100, token)  # Return the Integer constant with token type 100
        elif char.isalnum() or char == '_':
            token = char
            self.pointer += 1
            while self.pointer < len(self.string) and (self.string[self.pointer].isalnum() or self.string[self.pointer] == '_'):
                token += self.string[self.pointer]
                self.pointer += 1
            if token in self.keywords:
                return self.keywords[token]
            else:
                return self.is_identifier(token)
        elif char in self.operators or (self.string[self.pointer:self.pointer + 2]) in self.operators:
            if (self.string[self.pointer:self.pointer + 2]) in self.operators:
                self.pointer += 2
                return self.operators[self.string[self.pointer-2:self.pointer]]
            else:
                self.pointer += 1
                return self.operators[char]
        elif char in self.delimiters:
            self.pointer += 1
            return self.delimiters[char]
        elif char.isspace():
            self.pointer += 1
            return self.get_next_token()

    def analyze(self):
        if self.pointer >= len(self.string):
            return None
        return self.get_next_token()

    def analyze_file(self, file_path):
        try:
            with open(file_path, 'r') as file:
                file_content = file.read()  # Read file contents once
                self.string = file_content  # Assign the content to self.string
                self.token_num = self.count_tokens()
        except FileNotFoundError:
            print("File not found:", file_path)


In [36]:
class Parser:
    def __init__(self, lexer):
        self.lexer = lexer
        self.current_token = None
        self.step = 1

    def eat(self, token_type):
        if self.current_token is not None and self.current_token[0] == token_type:
            self.current_token = self.lexer.analyze()
        else:
            raise ValueError("Unexpected token")

    def print_step(self, production):
        print(f"({self.step}) {production}")
        self.step += 1

    def factor(self):
        if self.current_token is None:
            return
        self.print_step("factor ⟶ num")
        token_type = self.current_token[0]
        if token_type == 100:
            token_value = self.current_token[1]
            self.eat(100)
            return int(token_value)
        elif token_type == 111:
            identifier = self.current_token[1]
            self.eat(111)
            return identifier
        elif self.current_token[1] == '(':
            self.eat(81)
            result = self.expression()
            self.eat(82)
            return result

    def unary(self):
        if self.current_token is None:
            return
        self.print_step("unary ⟶ factor")
        return self.factor()

    def term(self):
        if self.current_token is None:
            return
        self.print_step("term ⟶ unary rest6")
        result = self.unary()
        while self.current_token is not None and self.current_token[0] in (43, 44):
            op = self.current_token[0]
            self.eat(op)
            if op == 43:
                self.print_step("rest6 ⟶ * unary rest6")
                result *= self.unary()
            elif op == 44:
                self.print_step("rest6 ⟶ / unary rest6")
                result /= self.unary()
        self.print_step("rest6 ⟶ ℇ")
        return result

    def expression(self):
        if self.current_token is None:
            return
        self.print_step("expr ⟶ term rest5")
        result = self.term()
        while self.current_token is not None and self.current_token[0] in (41, 42):
            op = self.current_token[0]
            self.eat(op)
            if op == 41:
                self.print_step("rest5 ⟶ + term rest5")
                result += self.term()
            elif op == 42:
                self.print_step("rest5 ⟶ - term rest5")
                result -= self.term()
        self.print_step("rest5 ⟶ ℇ")
        return result

    def rop_expr(self):
        if self.current_token is None:
            return None
        while self.current_token is not None and self.current_token[0] in (47, 48, 49, 50):
            op = self.current_token[0]
            self.eat(op)
            if op == 49:
                self.print_step("rop_expr ⟶ < expr")
            elif op == 50:
                self.print_step("rop_expr ⟶ <= expr")
            elif op == 47:
                self.print_step("rop_expr ⟶ > expr")
            elif op == 48:
                self.print_step("rop_expr ⟶ >= expr")
            result = self.expression()
            return result
        else:
            self.print_step("rop_expr ⟶ ℇ")
            return None


    def rel(self):
        if self.current_token is None:
            return
        self.print_step("rel ⟶ expr rop_expr")
        left = self.expression()
        right = self.rop_expr()
        if right is not None:
            return left, right
        return left

    def equality(self):
        if self.current_token is None:
            return None
        self.print_step("equality ⟶ rel rest4")
        left = self.rel()

        while self.current_token is not None and self.current_token[0] in (51, 52):
            op = self.current_token[0]
            self.eat(op)

            if op == 51:
                self.print_step("rest4 ⟶ ==rel rest4")
            elif op == 52:
                self.print_step("rest4 ⟶ !=rel rest4")

            right = self.rel()
            left = (left, op, right)

        self.print_step("rest4 ⟶ ℇ")
        return left




    def bool_expr(self):
        if self.current_token is None:
            return
        self.print_step("bool ⟶ equality")
        return self.equality()

    def parse(self):
        self.current_token = self.lexer.analyze()
        return self.bool_expr()

lexer = Lexer()
lexer.analyze_file("source_code2.txt")
parser = Parser(lexer)
result = parser.parse()
print(result)

(1) bool ⟶ equality
(2) equality ⟶ rel rest4
(3) rel ⟶ expr rop_expr
(4) expr ⟶ term rest5
(5) term ⟶ unary rest6
(6) unary ⟶ factor
(7) factor ⟶ num
(8) rest6 ⟶ ℇ
(9) rest5 ⟶ ℇ
(10) rop_expr ⟶ ℇ
(11) rest4 ⟶ ℇ
None
