In [10]:
import re

class TokenType:
    INTEGER = 'INTEGER'
    BOOLEAN = 'BOOLEAN'
    OPERATOR = 'OPERATOR'
    ASSIGNMENT = 'ASSIGNMENT'
    EQUALITY = 'EQUALITY'
    INEQUALITY = 'INEQUALITY'
    KEYWORD = 'KEYWORD'
    IDENTIFIER = 'IDENTIFIER'
    PRINT = 'PRINT'
    TRUE = 'TRUE'
    FALSE = 'FALSE'
    COMMENT = 'COMMENT'
    ERROR = 'ERROR'

class Token:
    def __init__(self, type, lexeme):
        self.type = type
        self.lexeme = lexeme

def scan_file(filename):
    keywords = {'if', 'else', 'print', 'true', 'false'}
    operators = {'+', '-', '*', '/', '=', '==', '!='}

    tokens = []
    with open(filename, 'r') as file:
        for line_number, line in enumerate(file):
            line = line.strip()
            while '//' in line:
                comment_start = line.index('//')
                line = line[:comment_start]

            if not line:
                continue

            i = 0
            while i < len(line):
                if line[i].isdigit():
                    match = re.match(r'\d+', line[i:])
                    if match:
                        tokens.append(Token(TokenType.INTEGER, match.group(0)))
                        i += len(match.group(0))
                    else:
                        tokens.append(Token(TokenType.ERROR, line[i]))
                        i += 1
                elif line[i].isalpha() or line[i] == '_':
                    match = re.match(r'[a-zA-Z_]\w*', line[i:])
                    if match:
                        lexeme = match.group(0)
                        if lexeme in keywords:
                            tokens.append(Token(TokenType.KEYWORD, lexeme))
                        elif lexeme == 'true':
                            tokens.append(Token(TokenType.BOOLEAN, lexeme))
                        elif lexeme == 'false':
                            tokens.append(Token(TokenType.BOOLEAN, lexeme))
                        else:
                            tokens.append(Token(TokenType.IDENTIFIER, lexeme))
                        i += len(match.group(0))
                    else:
                        tokens.append(Token(TokenType.ERROR, line[i]))
                        i += 1
                elif line[i] in operators:
                    tokens.append(Token(TokenType.OPERATOR, line[i]))
                    i += 1
                elif line[i:i+2] in ('==', '!='):
                    tokens.append(Token(TokenType.EQUALITY if line[i:i+2] == '==' else TokenType.INEQUALITY, line[i:i+2]))
                    i += 2
                elif line[i] == '=':
                    tokens.append(Token(TokenType.ASSIGNMENT, line[i]))
                    i += 1
                elif line[i] == ' ':
                    i += 1
                elif line[i] == '\t':
                    i += 1
                elif line[i] == '\r':
                    i += 1
                elif line[i] == '\n':
                    i += 1
                else:
                    tokens.append(Token(TokenType.ERROR, line[i]))
                    i += 1

    return tokens


if __name__ == "__main__":
    filename =  "tc_2.minilang"
    tokens = scan_file(filename)
    for token in tokens:
        print(token.type, token.lexeme)


KEYWORD if
ERROR (
IDENTIFIER x
OPERATOR =
OPERATOR =
INTEGER 10
ERROR )
ERROR {
KEYWORD print
KEYWORD true
ERROR ;
ERROR }
KEYWORD else
ERROR {
KEYWORD print
KEYWORD false
ERROR ;
ERROR }
