Célula 1 — Setup + Tokens + AST + Parser

In [None]:
# Mini-pipeline de compilação (compacto): Lexer → Parser/AST
from dataclasses import dataclass
from typing import List, Optional, Any
import re, math

# ===== Lexer =====
class LexError(Exception): ...
TOKEN_SPEC = [
    ("NUMBER",   r"\d+(\.\d+)?"), ("IDENT", r"[A-Za-z_]\w*"),
    ("PLUS", r"\+"), ("MINUS", r"-"), ("STAR", r"\*"), ("SLASH", r"/"),
    ("CARET", r"\^"), ("LPAREN", r"\("), ("RPAREN", r"\)"),
    ("SKIP", r"[ \t\r\n]+"), ("MISMATCH", r".")
]
MASTER_RE = re.compile("|".join(f"(?P<{n}>{r})" for n, r in TOKEN_SPEC))

@dataclass
class Token:
    type: str
    value: str
    pos: int

def tokenize(code: str) -> List[Token]:
    out = []
    for m in MASTER_RE.finditer(code):
        k = m.lastgroup; v = m.group(); p = m.start()
        if k == "SKIP":
            continue
        if k == "MISMATCH":
            raise LexError(f"Caractere inesperado '{v}' em {p}")
        out.append(Token(k, v, p))
    return out

# ===== AST =====
@dataclass
class Num:
    value: float

@dataclass
class Var:
    name: str

@dataclass
class Unary:
    op: str
    expr: Any

@dataclass
class Binary:
    op: str
    left: Any
    right: Any

def ast_str(n, ind=0):
    pad = "  " * ind
    if isinstance(n, Num):   return f"{pad}Num({n.value})"
    if isinstance(n, Var):   return f"{pad}Var({n.name})"
    if isinstance(n, Unary): return f"{pad}Unary({n.op})\n{ast_str(n.expr, ind+1)}"
    if isinstance(n, Binary):
        return f"{pad}Binary({n.op})\n{ast_str(n.left, ind+1)}\n{ast_str(n.right, ind+1)}"
    return f"{pad}{n!r}"

# ===== Parser (descida recursiva) =====
class ParseError(Exception): ...

class Parser:
    def __init__(self, tokens: List[Token]):
        self.t = tokens
        self.i = 0

    def peek(self, *types) -> Optional[Token]:
        return self.t[self.i] if self.i < len(self.t) and self.t[self.i].type in types else None

    def eat(self, *types) -> Token:
        if self.peek(*types):
            tok = self.t[self.i]; self.i += 1; return tok
        got = self.t[self.i].type if self.i < len(self.t) else "EOF"
        pos = self.t[self.i].pos if self.i < len(self.t) else -1
        raise ParseError(f"Esperado {types}, obtido {got} em {pos}")

    def parse(self):
        node = self.expr()
        if self.i != len(self.t):
            raise ParseError("Entrada extra após expressão.")
        return node

    # expr -> term ((+|-) term)*
    def expr(self):
        node = self.term()
        while self.peek("PLUS", "MINUS"):
            op = self.eat("PLUS", "MINUS").value
            node = Binary(op, node, self.term())
        return node

    # term -> power ((*|/) power)*
    def term(self):
        node = self.power()
        while self.peek("STAR", "SLASH"):
            op = self.eat("STAR", "SLASH").value
            node = Binary(op, node, self.power())
        return node

    # power -> unary (^ unary)*
    def power(self):
        node = self.unary()
        while self.peek("CARET"):
            self.eat("CARET")
            node = Binary("^", node, self.unary())
        return node

    # unary -> (- unary) | primary
    def unary(self):
        if self.peek("MINUS"):
            self.eat("MINUS")
            return Unary("-", self.unary())
        return self.primary()

    # primary -> NUMBER | IDENT '(' expr ')' | '(' expr ')'
    def primary(self):
        if self.peek("NUMBER"):
            return Num(float(self.eat("NUMBER").value))
        if self.peek("IDENT"):
            name = self.eat("IDENT").value
            self.eat("LPAREN"); arg = self.expr(); self.eat("RPAREN")
            return Binary("CALL", Var(name), arg)
        if self.peek("LPAREN"):
            self.eat("LPAREN"); node = self.expr(); self.eat("RPAREN"); return node
        raise ParseError("Primário inválido.")

# Smoke test rápido
ts = tokenize("3 + 4 * (2 - 1) ^ 2")
ast = Parser(ts).parse()
print("Tokens:", [(t.type, t.value) for t in ts])
print("AST:\n", ast_str(ast))

Tokens: [('NUMBER', '3'), ('PLUS', '+'), ('NUMBER', '4'), ('STAR', '*'), ('LPAREN', '('), ('NUMBER', '2'), ('MINUS', '-'), ('NUMBER', '1'), ('RPAREN', ')'), ('CARET', '^'), ('NUMBER', '2')]
AST:
 Binary(+)
  Num(3.0)
  Binary(*)
    Num(4.0)
    Binary(^)
      Binary(-)
        Num(2.0)
        Num(1.0)
      Num(2.0)


Célula 2 — Otimização + IR (TAC) + Execução

In [None]:
from dataclasses import dataclass
from typing import Optional

# ===== Otimizador (folding + simplificações) =====
class CompileError(Exception): ...

def fold(node):
    if isinstance(node, Unary):
        e = fold(node.expr)
        return Num(-e.value) if isinstance(e, Num) and node.op == "-" else Unary(node.op, e)
    if isinstance(node, Binary):
        L, R = fold(node.left), fold(node.right)
        # chamada de função com argumento constante
        if node.op == "CALL" and isinstance(L, Var) and isinstance(R, Num) and hasattr(math, L.name.lower()):
            return Num(getattr(math, L.name.lower())(R.value))
        # operações entre constantes
        if isinstance(L, Num) and isinstance(R, Num):
            return Num({"+": L.value+R.value, "-": L.value-R.value, "*": L.value*R.value,
                        "/": L.value/R.value, "^": L.value**R.value}[node.op])
        # simplificações
        if node.op == "*":
            if isinstance(L, Num) and L.value == 1: return R
            if isinstance(R, Num) and R.value == 1: return L
            if (isinstance(L, Num) and L.value == 0) or (isinstance(R, Num) and R.value == 0): return Num(0)
        if node.op == "+":
            if isinstance(L, Num) and L.value == 0: return R
            if isinstance(R, Num) and R.value == 0: return L
        if node.op == "-":
            if isinstance(R, Num) and R.value == 0: return L
        if node.op == "^":
            if isinstance(R, Num) and R.value == 1: return L
            if isinstance(R, Num) and R.value == 0: return Num(1)
        return Binary(node.op, L, R)
    return node

# ===== IR de três endereços (TAC) + execução =====
Temp = 0
def newt():
    global Temp; Temp += 1; return f"t{Temp}"

@dataclass
class Instr:
    op: str
    args: tuple
    dest: Optional[str] = None

def gen_ir(n):
    if isinstance(n, Num): return str(n.value), []
    if isinstance(n, Unary) and n.op == "-":
        v, c = gen_ir(n.expr); t = newt(); return t, c + [Instr("NEG", (v,), t)]
    if isinstance(n, Binary):
        if n.op == "CALL" and isinstance(n.left, Var):
            a, c = gen_ir(n.right); t = newt(); return t, c + [Instr(f"CALL_{n.left.name}", (a,), t)]
        l, cl = gen_ir(n.left); r, cr = gen_ir(n.right)
        op = {"+": "ADD", "-": "SUB", "*": "MUL", "/": "DIV", "^": "POW"}[n.op]
        t = newt(); return t, cl + cr + [Instr(op, (l, r), t)]
    raise CompileError("Nó não suportado.")

def exec_ir(code):
    env = {}
    def val(x):
        try: return float(x)
        except: return env[x]
    for ins in code:
        op = ins.op
        if op.startswith("CALL_"):
            f = op.split("_", 1)[1].lower(); env[ins.dest] = getattr(math, f)(val(ins.args[0]))
        elif op == "NEG": env[ins.dest] = -val(ins.args[0])
        elif op == "ADD": env[ins.dest] = val(ins.args[0]) + val(ins.args[1])
        elif op == "SUB": env[ins.dest] = val(ins.args[0]) - val(ins.args[1])
        elif op == "MUL": env[ins.dest] = val(ins.args[0]) * val(ins.args[1])
        elif op == "DIV": env[ins.dest] = val(ins.args[0]) / val(ins.args[1])
        elif op == "POW": env[ins.dest] = val(ins.args[0]) ** val(ins.args[1])
        else: raise RuntimeError(f"Opcode desconhecido: {op}")
    return env[code[-1].dest] if code and code[-1].dest else 0.0

Célula 3 — Demonstração fim-a-fim

In [None]:
def run(expr: str):
    print("\n=== Expressão:", expr)
    ts  = tokenize(expr);         print("Tokens:", [(t.type, t.value) for t in ts])
    ast = Parser(ts).parse();     print("AST:\n", ast_str(ast))
    ast2 = fold(ast);             print("AST otimizada:\n", ast_str(ast2))
    global Temp; Temp = 0
    res, ir = gen_ir(ast2)
    print("IR (TAC):")
    for i in ir:
        if i.dest: print(f"{i.dest} = {i.op} {' '.join(map(str, i.args))}")
        else:      print(f"{i.op} {' '.join(map(str, i.args))}")
    print("Resultado:", exec_ir(ir))

# Exemplos da aula (rápidos)
run("3 + 4 * (2 - 1) ^ 2")
run("sin(0) + cos(0) ^ 2")
run("-3 + 2 * 5")


=== Expressão: 3 + 4 * (2 - 1) ^ 2
Tokens: [('NUMBER', '3'), ('PLUS', '+'), ('NUMBER', '4'), ('STAR', '*'), ('LPAREN', '('), ('NUMBER', '2'), ('MINUS', '-'), ('NUMBER', '1'), ('RPAREN', ')'), ('CARET', '^'), ('NUMBER', '2')]
AST:
 Binary(+)
  Num(3.0)
  Binary(*)
    Num(4.0)
    Binary(^)
      Binary(-)
        Num(2.0)
        Num(1.0)
      Num(2.0)
AST otimizada:
 Num(7.0)
IR (TAC):
Resultado: 0.0

=== Expressão: sin(0) + cos(0) ^ 2
Tokens: [('IDENT', 'sin'), ('LPAREN', '('), ('NUMBER', '0'), ('RPAREN', ')'), ('PLUS', '+'), ('IDENT', 'cos'), ('LPAREN', '('), ('NUMBER', '0'), ('RPAREN', ')'), ('CARET', '^'), ('NUMBER', '2')]
AST:
 Binary(+)
  Binary(CALL)
    Var(sin)
    Num(0.0)
  Binary(^)
    Binary(CALL)
      Var(cos)
      Num(0.0)
    Num(2.0)
AST otimizada:
 Num(1.0)
IR (TAC):
Resultado: 0.0

=== Expressão: -3 + 2 * 5
Tokens: [('MINUS', '-'), ('NUMBER', '3'), ('PLUS', '+'), ('NUMBER', '2'), ('STAR', '*'), ('NUMBER', '5')]
AST:
 Binary(+)
  Unary(-)
    Num(3.0)
  Binary(*