<a href="https://colab.research.google.com/github/RaoEhsanElahi/MiniLang_Parser/blob/main/Compiler_Assignment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
class Parser:
    def __init__(self, tokens):
        self.tokens = tokens
        self.current_token_index = 0

    def parse(self):
        # Implement top-down parsing logic here
        program = self.parse_program()
        return program

    def parse_program(self):
        # Parse a sequence of statements
        statements = []
        while self.current_token_index < len(self.tokens):
            statement = self.parse_statement()
            if statement:
                statements.append(statement)
        return statements

    def parse_statement(self):
        # Implement logic to parse different statement types (if-else, print, assignment)
        if self.match(TokenType.KEYWORD):
            if self.tokens[self.current_token_index - 1].lexeme == "if":
                return self.parse_if_else()
            elif self.tokens[self.current_token_index - 1].lexeme == "print":
                return self.parse_print()
            else:
                # Handle invalid keyword
                pass
        elif self.match(TokenType.IDENTIFIER):
            # Potentially an assignment statement, needs further parsing
            return self.parse_assignment()
        else:
            # Handle invalid statement start
            pass

        return None  # No statement parsed

    # Implement methods for parsing if-else, print, and assignment statements

    def match(self, token_type):
        # Similar logic as scanner's match method
        pass



In [None]:
from enum import Enum

class TokenType(Enum):
    INTEGER_LITERAL = "INTEGER_LITERAL"
    BOOLEAN_LITERAL = "BOOLEAN_LITERAL"
    IDENTIFIER = "IDENTIFIER"
    OPERATOR = "OPERATOR"
    KEYWORD = "KEYWORD"
    COMMENT = "COMMENT"
    WHITESPACE = "WHITESPACE"


class Token:
    def __init__(self, token_type, lexeme, line_number):
        self.type = token_type
        self.lexeme = lexeme
        self.line_number = line_number


class Scanner:
    def __init__(self, filename):
        self.filename = filename
        self.tokens = []
        self.keywords = ["if", "else", "print"]
        self.operators = ["+", "-", "*", "/", "=", "==", "!="]
        self.current_line = 1

    def scan(self):
        with open(self.filename, "r") as file:
            for line in file:
                line = line.strip()
                while line:
                    token = self.get_next_token(line)
                    if token:
                        self.tokens.append(token)
                        line = line[len(token.lexeme) :]
                    else:
                        print(f"Lexical error in line {self.current_line}: Invalid token")
                        return

                    if token.type == TokenType.COMMENT:
                        break
                self.current_line += 1

    def get_next_token(self, line):
        """
        Identifies the next token in the given line.
        """
        for token_type in [TokenType.COMMENT, TokenType.WHITESPACE, TokenType.OPERATOR]:
            match = self.match_pattern(line, TokenType.patterns[token_type])
            if match:
                return Token(token_type, match.group(0), self.current_line)

        for token_type in [TokenType.INTEGER_LITERAL, TokenType.BOOLEAN_LITERAL, TokenType.IDENTIFIER]:
            match = self.match_pattern(line, TokenType.patterns[token_type])
            if match:
                return Token(token_type, match.group(0), self.current_line)

        for keyword in self.keywords:
            if line.startswith(keyword):
                return Token(TokenType.KEYWORD, keyword, self.current_line)

        return None  # No token found

    @staticmethod
    def match_pattern(line, pattern):
        return re.match(pattern, line)

    def display_tokens(self):
        for token in self.tokens:
            print(f"{token.type.name}: {token.lexeme} (line {token.line_number})")


TokenType.patterns = {
    TokenType.INTEGER_LITERAL: r"\d+",
    TokenType.BOOLEAN_LITERAL: r"true|false",
    TokenType.IDENTIFIER: r"[a-zA-Z][a-zA-Z0-9]*",
    TokenType.OPERATOR: r"\+|\-|\*|\/|\=|\=\=|\!\=",
    TokenType.COMMENT: r"//.*",
    TokenType.WHITESPACE: r"\s+",
}

if __name__ == "__main__":
    filename = input("Enter the filename to scan: ")
    scanner = Scanner(filename)
    scanner.scan()
    scanner.display_tokens()
