Planejamento do compilador (linguagem e gramática)

In [None]:
# Cria um "manifesto" do projeto com EBNF e uma checagem de precedência.
from textwrap import dedent

EBNF = dedent(r"""
program   := stmt* EOF ;
stmt      := 'let' ID '=' expr ';'
           | ID '=' expr ';'
           | 'print' expr ';'
           | 'if' '(' expr ')' block ('else' block)?
           | 'while' '(' expr ')' block
           | block ;
block     := '{' stmt* '}' ;

expr      := or ;
or        := and ( '||' and )* ;
and       := equality ( '&&' equality )* ;
equality  := relation ( ('==' | '!=') relation )* ;
relation  := add ( ('<' | '<=' | '>' | '>=') add )* ;
add       := mul ( ('+' | '-') mul )* ;
mul       := unary ( ('*' | '/') unary )* ;
unary     := ('-' | '!') unary | primary ;
primary   := NUMBER | ID | '(' expr ')' ;

# Léxico:
ID        := [A-Za-z_][A-Za-z0-9_]* ;
NUMBER    := [0-9]+ ;
SKIP      := espaços | '\n' | comentários //... ;
""").strip()

# >>> Ordem do mais forte -> mais fraco <<<
prec_order = ["*","/","+","-","<","<=",">",">=","==","!=","&&","||"]
assoc = {op: "left" for op in prec_order}

print("=== EBNF do projeto ===\n")
print(EBNF, "\n")

# Checagem didática: garante que * / > + - > comparação/igualdade > && > ||
def check_precedence(order):
    idx = {op: i for i, op in enumerate(order)}
    assert idx["*"] < idx["+"] < idx["=="] < idx["&&"] < idx["||"]
    assert idx["/"] < idx["-"] < idx["<"]  < idx["&&"] < idx["||"]
    print("Ordem de precedência checada com sucesso.")

check_precedence(prec_order)

print("\nPolítica de associatividade (esperada 'left' para todos):")
for op in prec_order:
    print(f"{op}: {assoc[op]}")

print("\nEntrada exemplo e saída esperada:")
src = "let x = 2 + 3*4; if (x > 5) { print x; } else { print 0; }"
print("Entrada:", src)
print("Saída esperada:", 2 + 3*4)  # 14


=== EBNF do projeto ===

program   := stmt* EOF ;
stmt      := 'let' ID '=' expr ';'
           | ID '=' expr ';'
           | 'print' expr ';'
           | 'if' '(' expr ')' block ('else' block)?
           | 'while' '(' expr ')' block
           | block ;
block     := '{' stmt* '}' ;

expr      := or ;
or        := and ( '||' and )* ;
and       := equality ( '&&' equality )* ;
equality  := relation ( ('==' | '!=') relation )* ;
relation  := add ( ('<' | '<=' | '>' | '>=') add )* ;
add       := mul ( ('+' | '-') mul )* ;
mul       := unary ( ('*' | '/') unary )* ;
unary     := ('-' | '!') unary | primary ;
primary   := NUMBER | ID | '(' expr ')' ;

# Léxico:
ID        := [A-Za-z_][A-Za-z0-9_]* ;
NUMBER    := [0-9]+ ;
SKIP      := espaços | '\n' | comentários //... ; 

Ordem de precedência checada com sucesso.

Política de associatividade (esperada 'left' para todos):
*: left
/: left
+: left
-: left
<: left
<=: left
>: left
>=: left
==: left
!=: left
&&: left
||: left

Entrada exemplo 

Especificação da linguagem de entrada

In [None]:
# Validador leve que confere o vocabulário e uma forma geral plausível.
import re

TOKENS = {
    "KW": r"(let|print|if|else|while)\b",
    "OP2": r"(==|!=|<=|>=|\|\||&&)",
    "SYM": r"[{}();=+\-*/!<>]",
    "NUM": r"\d+",
    "ID":  r"[A-Za-z_]\w*",
    "WS":  r"\s+",
    "COM": r"//[^\n]*",
}
MASTER = re.compile("|".join(f"(?P<{k}>{v})" for k,v in TOKENS.items()))
RESERVED = {"let","print","if","else","while","true","false"}

def tokenize(src):
    pos=0; out=[]
    while pos<len(src):
        m=MASTER.match(src,pos)
        if not m: raise SyntaxError(f"token inválido @ {pos}")
        k=m.lastgroup; v=m.group(); pos=m.end()
        if k in ("WS","COM"): continue
        if k=="ID" and v in RESERVED: k="KW"
        out.append((k,v))
    out.append(("EOF","")); return out

def quick_validate(src):
    toks=tokenize(src)
    # Heurística: proíbe ID/NUM seguidos sem operador; exige ';' para terminar stmt simples.
    for i in range(len(toks)-1):
        a,b=toks[i],toks[i+1]
        if a[0] in ("ID","NUM") and b[0] in ("ID","NUM"):
            return False, "Dois átomos sem operador entre eles."
    if src.strip() and not src.strip().endswith(";") and "if" not in src and "while" not in src and "{" not in src:
        return False, "Programa simples deve terminar com ';'."
    return True, "Forma lexical/sintática geral plausível."

exemplos = [
    "let a=1; let b=2; print a+b;",
    "if (1) { print 1; } else { print 0; }",
    "while(0){print 1;}",
]
for e in exemplos:
    ok,msg = quick_validate(e)
    print("Entrada:", e, "\nValidação:", ok, "-", msg, "\n")

Entrada: let a=1; let b=2; print a+b; 
Validação: True - Forma lexical/sintática geral plausível. 

Entrada: if (1) { print 1; } else { print 0; } 
Validação: True - Forma lexical/sintática geral plausível. 

Entrada: while(0){print 1;} 
Validação: True - Forma lexical/sintática geral plausível. 



Definindo regras léxicas e sintáticas

In [None]:
# Lexer e Parser para MiniC (subset), com impressão de nós.
import re
from dataclasses import dataclass
from typing import List, Optional

SPEC = [
    ("NUMBER", r"\d+"),
    ("KW", r"\b(let|print|if|else|while|true|false)\b"),
    ("ID", r"[A-Za-z_]\w*"),
    ("OP", r"==|!=|<=|>=|\|\||&&|[+\-*/=!<>]"),
    ("LP", r"\("), ("RP", r"\)"),
    ("LB", r"\{"), ("RB", r"\}"),
    ("SC", r";"),
    ("WS", r"[ \t\r\n]+"),
    ("COM", r"//[^\n]*"),
]
MASTER = re.compile("|".join(f"(?P<{n}>{p})" for n,p in SPEC))

from dataclasses import dataclass
@dataclass
class Tok: t:str; v:str; line:int; col:int

def lex(src:str)->List[Tok]:
    line=1; base=0; out=[]
    for m in MASTER.finditer(src):
        k=m.lastgroup; v=m.group(); col=m.start()-base+1
        if k in ("WS","COM"):
            for i,ch in enumerate(v):
                if ch=="\n": line+=1; base=m.start()+i+1
            continue
        out.append(Tok(k,v,line,col))
        if "\n" in v:
            for i,ch in enumerate(v):
                if ch=="\n": line+=1; base=m.start()+i+1
    out.append(Tok("EOF","",line,1)); return out

# AST
@dataclass
class Node: line:int; col:int
@dataclass
class Program(Node): body:List['Stmt']
@dataclass
class Stmt(Node): pass
@dataclass
class Block(Stmt): body:List['Stmt']
@dataclass
class Let(Stmt): name:str; expr:'Expr'
@dataclass
class Assign(Stmt): name:str; expr:'Expr'
@dataclass
class Print(Stmt): expr:'Expr'
@dataclass
class If(Stmt): cond:'Expr'; then:Block; els:Optional[Block]
@dataclass
class While(Stmt): cond:'Expr'; body:Block
@dataclass
class Expr(Node): pass
@dataclass
class Num(Expr): val:int
@dataclass
class Bool(Expr): val:int
@dataclass
class Var(Expr): name:str
@dataclass
class Unary(Expr): op:str; e:Expr
@dataclass
class Bin(Expr): op:str; l:Expr; r:Expr

class Parser:
    def __init__(self,toks): self.t=toks; self.i=0
    def cur(self): return self.t[self.i]
    def eat(self,k, v=None):
        tok=self.cur()
        if tok.t==k and (v is None or tok.v==v):
            self.i+=1; return tok
        exp = f"{k}" + (f" '{v}'" if v else "")
        raise SyntaxError(f"Esperado {exp}, obtido {tok.t} '{tok.v}' @ {tok.line}:{tok.col}")
    def parse(self)->Program:
        body=[];
        while self.cur().t!="EOF": body.append(self.stmt())
        return Program(1,1,body)
    def block(self)->Block:
        l=self.eat("LB"); body=[]
        while self.cur().t!="RB": body.append(self.stmt())
        self.eat("RB"); return Block(l.line,l.col,body)
    def stmt(self)->Stmt:
        t=self.cur()
        if t.t=="KW" and t.v=="let":
            k=self.eat("KW","let"); name=self.eat("ID"); self.eat("OP","="); e=self.expr(); self.eat("SC")
            return Let(k.line,k.col,name.v,e)
        if t.t=="ID" and self.t[self.i+1].t=="OP" and self.t[self.i+1].v=="=":
            name=self.eat("ID").v; self.eat("OP","="); e=self.expr(); self.eat("SC")
            return Assign(t.line,t.col,name,e)
        if t.t=="KW" and t.v=="print":
            k=self.eat("KW","print"); e=self.expr(); self.eat("SC"); return Print(k.line,k.col,e)
        if t.t=="KW" and t.v=="if":
            k=self.eat("KW","if"); self.eat("LP"); c=self.expr(); self.eat("RP"); th=self.block(); el=None
            if self.cur().t=="KW" and self.cur().v=="else": self.eat("KW","else"); el=self.block()
            return If(k.line,k.col,c,th,el)
        if t.t=="KW" and t.v=="while":
            k=self.eat("KW","while"); self.eat("LP"); c=self.expr(); self.eat("RP"); b=self.block(); return While(k.line,k.col,c,b)
        if t.t=="LB": return self.block()
        raise SyntaxError(f"Comando inválido @ {t.line}:{t.col}")
    # precedência
    def expr(self): return self.or_()
    def or_(self):
        e=self.and_()
        while self.cur().t=="OP" and self.cur().v=="||":
            op=self.eat("OP","||"); e=Bin(op.line,op.col,"||",e,self.and_())
        return e
    def and_(self):
        e=self.eq()
        while self.cur().t=="OP" and self.cur().v=="&&":
            op=self.eat("OP","&&"); e=Bin(op.line,op.col,"&&",e,self.eq())
        return e
    def eq(self):
        e=self.rel()
        while self.cur().t=="OP" and self.cur().v in ("==","!="):
            op=self.eat("OP",self.cur().v); e=Bin(op.line,op.col,op.v,e,self.rel())
        return e
    def rel(self):
        e=self.add()
        while self.cur().t=="OP" and self.cur().v in ("<","<=",">",">="):
            op=self.eat("OP",self.cur().v); e=Bin(op.line,op.col,op.v,e,self.add())
        return e
    def add(self):
        e=self.mul()
        while self.cur().t=="OP" and self.cur().v in ("+","-"):
            op=self.eat("OP",self.cur().v); e=Bin(op.line,op.col,op.v,e,self.mul())
        return e
    def mul(self):
        e=self.unary()
        while self.cur().t=="OP" and self.cur().v in ("*","/"):
            op=self.eat("OP",self.cur().v); e=Bin(op.line,op.col,op.v,e,self.unary())
        return e
    def unary(self):
        if self.cur().t=="OP" and self.cur().v in ("-","!"):
            op=self.eat("OP",self.cur().v); r=self.unary(); return Unary(op.v, r.line, r.col, r)
        return self.primary()
    def primary(self):
        t=self.cur()
        if t.t=="NUMBER": self.eat("NUMBER"); return Num(t.line,t.col,int(t.v))
        if t.t=="KW" and t.v in ("true","false"): self.eat("KW",t.v); return Bool(t.line,t.col, 1 if t.v=="true" else 0)
        if t.t=="ID": self.eat("ID"); return Var(t.line,t.col,t.v)
        if t.t=="LP": self.eat("LP"); e=self.expr(); self.eat("RP"); return e
        raise SyntaxError(f"Expressão inválida @ {t.line}:{t.col}")

# Demonstração rápida
src = "let x=2; print x+3*2;"
ast = Parser(lex(src)).parse()
print("Entrada:", src)
print("Nós de topo:", len(ast.body))
print("Primeiro stmt:", ast.body[0])
print("Segundo stmt:", ast.body[1])


Entrada: let x=2; print x+3*2;
Nós de topo: 2
Primeiro stmt: Let(line=1, col=1, name='x', expr=Num(line=1, col=7, val=2))
Segundo stmt: Print(line=1, col=10, expr=Bin(line=1, col=17, op='+', l=Var(line=1, col=16, name='x'), r=Bin(line=1, col=19, op='*', l=Num(line=1, col=18, val=3), r=Num(line=1, col=20, val=2))))


Protótipo inicial no Colab

In [None]:
# === START: MiniC (subset) — célula diagnóstica com feedback explícito ===
import re, io, contextlib
from dataclasses import dataclass
from typing import List, Optional

print("[START] preparando lexer/parser/interpretador...", flush=True)

# ---------- Lexer ----------
SPEC = [
    ("NUMBER", r"\d+"),
    ("KW", r"\b(let|print|if|else|while|true|false)\b"),
    ("ID", r"[A-Za-z_]\w*"),
    ("OP", r"==|!=|<=|>=|\|\||&&|[+\-*/=!<>]"),
    ("LP", r"\("), ("RP", r"\)"),
    ("LB", r"\{"), ("RB", r"\}"),
    ("SC", r";"),
    ("WS", r"[ \t\r\n]+"),
    ("COM", r"//[^\n]*"),
]
MASTER = re.compile("|".join(f"(?P<{n}>{p})" for n,p in SPEC))

@dataclass
class Tok: t:str; v:str; line:int; col:int
def lex(src:str)->List[Tok]:
    line=1; base=0; out=[]
    for m in MASTER.finditer(src):
        k=m.lastgroup; v=m.group(); col=m.start()-base+1
        if k in ("WS","COM"):
            for i,ch in enumerate(v):
                if ch=="\n": line+=1; base=m.start()+i+1
            continue
        out.append(Tok(k,v,line,col))
        if "\n" in v:
            for i,ch in enumerate(v):
                if ch=="\n": line+=1; base=m.start()+i+1
    out.append(Tok("EOF","",line,1)); return out

# ---------- AST ----------
@dataclass
class Node: line:int; col:int
@dataclass
class Program(Node): body:List['Stmt']
@dataclass
class Stmt(Node): pass
@dataclass
class Block(Stmt): body:List['Stmt']
@dataclass
class Let(Stmt): name:str; expr:'Expr'
@dataclass
class Assign(Stmt): name:str; expr:'Expr'
@dataclass
class Print(Stmt): expr:'Expr'
@dataclass
class If(Stmt): cond:'Expr'; then:Block; els:Optional[Block]
@dataclass
class While(Stmt): cond:'Expr'; body:Block

@dataclass
class Expr(Node): pass
@dataclass
class Num(Expr): val:int
@dataclass
class Bool(Expr): val:int  # 1/0
@dataclass
class Var(Expr): name:str
@dataclass
class Unary(Expr): op:str; e:Expr
@dataclass
class Bin(Expr): op:str; l:Expr; r:Expr

# ---------- Parser ----------
class Parser:
    def __init__(self,t): self.t=t; self.i=0
    def cur(self): return self.t[self.i]
    def eat(self,k,v=None):
        tok=self.cur()
        if tok.t==k and (v is None or tok.v==v): self.i+=1; return tok
        raise SyntaxError(f"Esperado {k}{(' '+v) if v else ''}, obtido {tok.t} '{tok.v}' @ {tok.line}:{tok.col}")
    def parse(self):
        body=[]
        while self.cur().t!="EOF": body.append(self.stmt())
        return Program(1,1,body)
    def block(self):
        l=self.eat("LB"); body=[]
        while self.cur().t!="RB": body.append(self.stmt())
        self.eat("RB"); return Block(l.line,l.col,body)
    def stmt(self):
        t=self.cur()
        if t.t=="KW" and t.v=="let":
            k=self.eat("KW","let"); name=self.eat("ID"); self.eat("OP","="); e=self.expr(); self.eat("SC")
            return Let(k.line,k.col,name.v,e)
        if t.t=="ID" and self.t[self.i+1].t=="OP" and self.t[self.i+1].v=="=":
            name=self.eat("ID").v; self.eat("OP","="); e=self.expr(); self.eat("SC")
            return Assign(t.line,t.col,name,e)
        if t.t=="KW" and t.v=="print":
            k=self.eat("KW","print"); e=self.expr(); self.eat("SC"); return Print(k.line,k.col,e)
        if t.t=="KW" and t.v=="if":
            k=self.eat("KW","if"); self.eat("LP"); c=self.expr(); self.eat("RP"); th=self.block(); el=None
            if self.cur().t=="KW" and self.cur().v=="else": self.eat("KW","else"); el=self.block()
            return If(k.line,k.col,c,th,el)
        if t.t=="KW" and t.v=="while":
            k=self.eat("KW","while"); self.eat("LP"); c=self.expr(); self.eat("RP"); b=self.block()
            return While(k.line,k.col,c,b)
        if t.t=="LB": return self.block()
        raise SyntaxError(f"Comando inválido @ {t.line}:{t.col}")
    # precedência
    def expr(self): return self.or_()
    def or_(self):
        e=self.and_()
        while self.cur().t=="OP" and self.cur().v=="||":
            op=self.eat("OP","||"); e=Bin(op.line,op.col,"||",e,self.and_())
        return e
    def and_(self):
        e=self.eq()
        while self.cur().t=="OP" and self.cur().v=="&&":
            op=self.eat("OP","&&"); e=Bin(op.line,op.col,"&&",e,self.eq())
        return e
    def eq(self):
        e=self.rel()
        while self.cur().t=="OP" and self.cur().v in ("==","!="):
            op=self.eat("OP",self.cur().v); e=Bin(op.line,op.col,op.v,e,self.rel())
        return e
    def rel(self):
        e=self.add()
        while self.cur().t=="OP" and self.cur().v in ("<","<=",">",">="):
            op=self.eat("OP",self.cur().v); e=Bin(op.line,op.col,op.v,e,self.add())
        return e
    def add(self):
        e=self.mul()
        while self.cur().t=="OP" and self.cur().v in ("+","-"):
            op=self.eat("OP",self.cur().v); e=Bin(op.line,op.col,op.v,e,self.mul())
        return e
    def mul(self):
        e=self.unary()
        while self.cur().t=="OP" and self.cur().v in ("*","/"):
            op=self.eat("OP",self.cur().v); e=Bin(op.line,op.col,op.v,e,self.unary())
        return e
    def unary(self):
        if self.cur().t=="OP" and self.cur().v in ("-","!"):
            op=self.eat("OP",self.cur().v); r=self.unary()
            # ordem correta: (line, col, op, expr)
            return Unary(op.line, op.col, op.v, r)
        return self.primary()
    def primary(self):
        t=self.cur()
        if t.t=="NUMBER": self.eat("NUMBER"); return Num(t.line,t.col,int(t.v))
        if t.t=="KW" and t.v in ("true","false"):
            self.eat("KW",t.v); return Bool(t.line,t.col, 1 if t.v=="true" else 0)
        if t.t=="ID": self.eat("ID"); return Var(t.line,t.col,t.v)
        if t.t=="LP": self.eat("LP"); e=self.expr(); self.eat("RP"); return e
        raise SyntaxError(f"Expr inválida @ {t.line}:{t.col}")

# ---------- Ambiente + Interp ----------
class Env:
    def __init__(self): self.stack=[{}]
    def push(self): self.stack.append({})
    def pop(self): self.stack.pop()
    def declare(self, name, val): self.stack[-1][name]=val
    def set(self, name, val):
        for scope in reversed(self.stack):
            if name in scope:
                scope[name]=val; return
        raise RuntimeError(f"variável '{name}' não definida")
    def get(self, name):
        for scope in reversed(self.stack):
            if name in scope:
                return scope[name]
        raise RuntimeError(f"variável '{name}' não definida")

class Interp:
    def __init__(self): self.env = Env()
    def run(self, prog:Program):
        for st in prog.body: self.exec_stmt(st)
    def exec_stmt(self, s:Stmt):
        if isinstance(s, Let): self.env.declare(s.name, self.eval(s.expr)); return
        if isinstance(s, Assign): self.env.set(s.name, self.eval(s.expr)); return
        if isinstance(s, Print): print(self.eval(s.expr)); return
        if isinstance(s, Block):
            self.env.push()
            try:
                for x in s.body: self.exec_stmt(x)
            finally:
                self.env.pop()
            return
        if isinstance(s, If):
            if self.eval(s.cond): self.exec_stmt(s.then)
            elif s.els: self.exec_stmt(s.els)
            return
        if isinstance(s, While):
            while self.eval(s.cond): self.exec_stmt(s.body)
            return
        raise RuntimeError("Stmt não reconhecido")
    def eval(self, e:Expr):
        if isinstance(e, Num): return e.val
        if isinstance(e, Bool): return e.val
        if isinstance(e, Var):  return self.env.get(e.name)
        if isinstance(e, Unary):
            v=self.eval(e.e); return -v if e.op=="-" else (0 if v else 1)
        if isinstance(e, Bin):
            if e.op=="||":
                a=self.eval(e.l); return 1 if (a or self.eval(e.r)) else 0
            if e.op=="&&":
                a=self.eval(e.l); return 1 if (a and self.eval(e.r)) else 0
            a=self.eval(e.l); b=self.eval(e.r)
            if e.op=="+": return a+b
            if e.op=="-": return a-b
            if e.op=="*": return a*b
            if e.op=="/":
                if b==0: raise RuntimeError("divisão por zero")
                return a//b
            if e.op=="==": return 1 if a==b else 0
            if e.op=="!=": return 1 if a!=b else 0
            if e.op=="<":  return 1 if a<b else 0
            if e.op=="<=": return 1 if a<=b else 0
            if e.op==">":  return 1 if a>b else 0
            if e.op==">=": return 1 if a>=b else 0
        raise RuntimeError("Expr não reconhecida")

# ---------- Runner com captura (feedback) ----------
def run_and_capture(src:str):
    prog = Parser(lex(src)).parse()
    buf = io.StringIO()
    with contextlib.redirect_stdout(buf):
        Interp().run(prog)
    return buf.getvalue().strip().splitlines() if buf.getvalue() else []

# Teste 1
src1 = """
let i=0;
while (i < 3) { print i; i = i + 1; }
if (i == 3) { print 100; } else { print 0; }
"""
out1 = run_and_capture(src1)
exp1 = ["0","1","2","100"]
print("\n[PROGRAMA 1]")
print("Saída:", out1)
print("Esperado:", exp1)
print("Resultado:", "PASSOU" if out1==exp1 else "FALHOU")

# Teste 2: escopo
src2 = """
let x=1;
{ let x=5; print x; }
print x;
"""
out2 = run_and_capture(src2)
exp2 = ["5","1"]
print("\n[PROGRAMA 2]")
print("Saída:", out2)
print("Esperado:", exp2)
print("Resultado:", "PASSOU" if out2==exp2 else "FALHOU")

print("\n[END] testes concluídos.", flush=True)
# === END ===


[START] preparando lexer/parser/interpretador...

[PROGRAMA 1]
Saída: ['0', '1', '2', '100']
Esperado: ['0', '1', '2', '100']
Resultado: PASSOU

[PROGRAMA 2]
Saída: ['5', '1']
Esperado: ['5', '1']
Resultado: PASSOU

[END] testes concluídos.
