# **Grammar for MiniLang**


In [None]:
program -> statement_list

statement_list -> statement ';' statement_list | statement ';'
statement -> assignment | conditional | print

assignment -> IDENTIFIER '=' expression

conditional -> 'if' '(' expression ')' '{' statement_list '}' 'else' '{' statement_list '}'

print -> 'print' expression

expression -> term | expression '+' term | expression '-' term
term -> factor | term '*' factor | term '/' factor
factor -> IDENTIFIER | INTEGER | '(' expression ')' | BOOLEAN | UNARY_OP factor

UNARY_OP -> '!' | '-' | '+'

BOOLEAN -> 'true' | 'false'


# **Top-down recursive descent parser for MiniLang**

In [12]:
import re

# Token types
INTEGER = 'INTEGER'
BOOLEAN = 'BOOLEAN'
IDENTIFIER = 'IDENTIFIER'
KEYWORD = 'KEYWORD'
OPERATOR = 'OPERATOR'
DELIMITER = 'DELIMITER'
COMMENT = 'COMMENT'

# Regular expressions for tokenization
token_expressions = [
    (r'\d+', INTEGER),
    (r'(true|false)', BOOLEAN),
    (r'[a-zA-Z][a-zA-Z0-9_]*', IDENTIFIER),
    (r'(if|else|print)', KEYWORD),
    (r'[\+\-\*\/\&\|\!\=\>\<\(\)\{\}\;\=]', OPERATOR),
    (r'\/\/.*', COMMENT),
    (r'\s+', None)  # Ignore whitespace
]

# Tokenize function
def tokenize(code):
    tokens = []
    while code:
        for pattern, token_type in token_expressions:
            match = re.match(pattern, code)
            if match:
                value = match.group(0)
                if token_type is not None:
                    tokens.append((value, token_type))
                break
        code = code[len(value):]
    return tokens

# AST Node class
class ASTNode:
    def __init__(self, type_, value=None, children=None):
        self.type = type_
        self.value = value
        self.children = children if children is not None else []

    def __repr__(self):
        if self.children:
            return f'{self.type}({", ".join(repr(child) for child in self.children)})'
        elif self.value:
            return f'{self.type}: {self.value}'
        else:
            return f'{self.type}'

# Parser class
class Parser:
    def __init__(self, tokens):
        self.tokens = tokens
        self.current_token = None
        self.token_index = -1
        self.advance()

    def advance(self):
        self.token_index += 1
        if self.token_index < len(self.tokens):
            self.current_token = self.tokens[self.token_index]
        else:
            self.current_token = None

    def parse(self):
        return self.parse_program()

    def parse_program(self):
        program = ASTNode('Program')
        while self.current_token:
            statement = self.parse_statement()
            if statement:
                program.children.append(statement)
        return program

    def parse_statement(self):
        token = self.current_token
        if token[1] == KEYWORD:
            if token[0] == 'if':
                return self.parse_if_statement()
            elif token[0] == 'print':
                return self.parse_print_statement()
        elif token[1] == IDENTIFIER:
            return self.parse_assignment()
        self.advance()  # Skip unsupported statement
        return None

    def parse_assignment(self):
        identifier = self.current_token[0]
        self.advance()  # Skip identifier
        self.advance()  # Skip '='
        value = self.parse_expression()
        self.advance()  # Skip ';'
        return ASTNode('Assignment', (identifier, value))

    def parse_if_statement(self):
        condition = self.parse_expression()
        self.advance()  # Skip '{'
        if_block = self.parse_program()
        self.advance()  # Skip '}'
        else_block = None
        if self.current_token and self.current_token[0] == 'else':
            self.advance()  # Skip 'else'
            self.advance()  # Skip '{'
            else_block = self.parse_program()
            self.advance()  # Skip '}'
        return ASTNode('IfStatement', (condition, if_block, else_block))

    def parse_print_statement(self):
        self.advance()  # Skip 'print'
        value = self.parse_expression()
        self.advance()  # Skip ';'
        return ASTNode('PrintStatement', value)

    def parse_expression(self):
        # For simplicity, assuming expressions are only identifiers or literals
        return ASTNode('Expression', self.current_token[0])

# Main function to test the parser
def main():
    code = input("Enter MiniLang code: ")
    tokens = tokenize(code)
    parser = Parser(tokens)
    ast = parser.parse()
    print(ast)

if __name__ == '__main__':
    main()


Enter MiniLang code: z = x +;
Program(Assignment: ('z', Expression: x))
