In [8]:
import re

# ----------------------
# Lexer
# ----------------------
TOKENS = [
    ('INT', r'integer'),
    ('PRINT', r'print'),
    ('NUMBER', r'\d+'),
    ('ID', r'[a-zA-Z_][a-zA-Z0-9_]*'),
    ('ASSIGN', r'='),
    ('PLUS', r'\+'),
    ('MINUS', r'-'),
    ('MUL', r'\*'),
    ('DIV', r'/'),
    ('LPAREN', r'\('),
    ('RPAREN', r'\)'),
    ('SEMICOLON', r';'),
    ('SKIP', r'[ \t\n]+'),
]

def lexer(code):
    pos = 0
    tokens = []
    while pos < len(code):
        match = None
        for token_type, pattern in TOKENS:
            regex = re.compile(pattern)
            match = regex.match(code, pos)
            if match:
                text = match.group(0)
                if token_type != 'SKIP':
                    tokens.append((token_type, text))
                pos = match.end(0)
                break
        if not match:
            raise SyntaxError(f'Unexpected character: {code[pos]}')
    tokens.append(('EOF', ''))
    return tokens

# ----------------------
# Parser / AST
# ----------------------
class ASTNode:
    def __init__(self, type_, **kwargs):
        self.type = type_
        self.__dict__.update(kwargs)

class Parser:
    def __init__(self, tokens):
        self.tokens = tokens
        self.pos = 0

    def peek(self):
        return self.tokens[self.pos]

    def eat(self, token_type):
        if self.peek()[0] == token_type:
            self.pos += 1
        else:
            raise SyntaxError(f"Expected {token_type}, got {self.peek()}")

    def parse_program(self):
        stmts = []
        while self.peek()[0] != 'EOF':
            stmts.append(self.parse_stmt())
        return ASTNode('PROGRAM', statements=stmts)

    def parse_stmt(self):
        tok = self.peek()[0]
        if tok == 'INT':
            return self.parse_decl()
        elif tok == 'ID':
            return self.parse_assign()
        elif tok == 'PRINT':
            return self.parse_print()
        else:
            raise SyntaxError(f"Unexpected token {self.peek()}")

    def parse_decl(self):
        self.eat('INT')
        var_name = self.peek()[1]
        self.eat('ID')
        self.eat('ASSIGN')
        expr = self.parse_expr()
        self.eat('SEMICOLON')
        return ASTNode('DECL', var=var_name, expr=expr)

    def parse_assign(self):
        var_name = self.peek()[1]
        self.eat('ID')
        self.eat('ASSIGN')
        expr = self.parse_expr()
        self.eat('SEMICOLON')
        return ASTNode('ASSIGN', var=var_name, expr=expr)

    def parse_print(self):
        self.eat('PRINT')
        self.eat('LPAREN')
        var_name = self.peek()[1]
        self.eat('ID')
        self.eat('RPAREN')
        self.eat('SEMICOLON')
        return ASTNode('PRINT', var=var_name)

    def parse_expr(self):
        left = self.parse_term()
        while self.peek()[0] in ('PLUS', 'MINUS'):
            op = self.peek()[1]
            self.eat(self.peek()[0])
            right = self.parse_term()
            left = ASTNode('BINOP', op=op, left=left, right=right)
        return left

    def parse_term(self):
        left = self.parse_factor()
        while self.peek()[0] in ('MUL', 'DIV'):
            op = self.peek()[1]
            self.eat(self.peek()[0])
            right = self.parse_factor()
            left = ASTNode('BINOP', op=op, left=left, right=right)
        return left

    def parse_factor(self):
        tok_type, tok_val = self.peek()
        if tok_type == 'NUMBER':
            self.eat('NUMBER')
            return ASTNode('NUMBER', value=int(tok_val))
        elif tok_type == 'ID':
            self.eat('ID')
            return ASTNode('ID', name=tok_val)
        elif tok_type == 'LPAREN':
            self.eat('LPAREN')
            expr = self.parse_expr()
            self.eat('RPAREN')
            return expr
        else:
            raise SyntaxError(f"Unexpected token {self.peek()}")

# ----------------------
# Interpreter
# ----------------------
class Interpreter:
    def __init__(self):
        self.symbol_table = {}

    def eval(self, node):
        if node.type == 'PROGRAM':
            for stmt in node.statements:
                self.eval(stmt)
        elif node.type == 'DECL':
            value = self.eval(node.expr)
            self.symbol_table[node.var] = value
        elif node.type == 'ASSIGN':
            value = self.eval(node.expr)
            if node.var not in self.symbol_table:
                raise NameError(f"Variable '{node.var}' not declared")
            self.symbol_table[node.var] = value
        elif node.type == 'PRINT':
            if node.var not in self.symbol_table:
                raise NameError(f"Variable '{node.var}' not declared")
            print(self.symbol_table[node.var])
        elif node.type == 'BINOP':
            left = self.eval(node.left)
            right = self.eval(node.right)
            if node.op == '+': return left + right
            if node.op == '-': return left - right
            if node.op == '*': return left * right
            if node.op == '/': return left // right  # integer division
        elif node.type == 'NUMBER':
            return node.value
        elif node.type == 'ID':
            if node.name not in self.symbol_table:
                raise NameError(f"Variable '{node.name}' not declared")
            return self.symbol_table[node.name]

# ----------------------
# Demo
# ----------------------
code = """
integer x = 5;
integer y = x + 3 * 2;
print(x);
print(y);
"""

tokens = lexer(code)
parser = Parser(tokens)
ast = parser.parse_program()

interpreter = Interpreter()
interpreter.eval(ast)


5
11


In [9]:
from graphviz import Digraph

def visualize_ast(node, graph=None, parent=None):
    if graph is None:
        graph = Digraph()
        graph.attr('node', shape='box', style='filled', color='lightblue')
    
    node_id = str(id(node))
    label = node.type
    
    # Add extra info for certain nodes
    if node.type == 'NUMBER':
        label += f'\n{node.value}'
    elif node.type == 'ID':
        label += f'\n{node.name}'
    elif node.type in ('DECL', 'ASSIGN', 'PRINT'):
        if hasattr(node, 'var'):
            label += f'\n{node.var}'
    
    graph.node(node_id, label=label)
    
    if parent is not None:
        graph.edge(parent, node_id)
    
    # Recursively add children
    if node.type in ('PROGRAM',):
        for stmt in node.statements:
            visualize_ast(stmt, graph, node_id)
    elif node.type in ('DECL', 'ASSIGN'):
        visualize_ast(node.expr, graph, node_id)
    elif node.type == 'PRINT':
        # For simplicity, treat print var as child
        child_node = ASTNode('ID', name=node.var)
        visualize_ast(child_node, graph, node_id)
    elif node.type == 'BINOP':
        visualize_ast(node.left, graph, node_id)
        visualize_ast(node.right, graph, node_id)
    
    return graph


In [10]:
# Your previous code
code = """
integer x = 5;
integer y = x + 3 * 2;
print(x);
print(y);
"""

tokens = lexer(code)
parser = Parser(tokens)
ast = parser.parse_program()

# Visualize
graph = visualize_ast(ast)
graph.render('parse_tree', format='png', cleanup=True)
print("Parse tree saved as parse_tree.png")


ExecutableNotFound: failed to execute WindowsPath('dot'), make sure the Graphviz executables are on your systems' PATH