In [1]:
from grammarLR1 import GrammarLR1
from grammarLR0 import GrammarLR0
from common.token_class import Token

In [2]:
def gramophoneSyntaxParser( inputTokens : str) -> GrammarLR1:
    non_terminals = []
    terminals = []
    start_symbol = ""
    productions = {}
    
    def add_symbol( symbol : str):
        symbol = symbol.strip()
        if len(symbol)  == 0:
            pass
        elif symbol[0].islower():
            if symbol not in terminals:
                terminals.append(symbol)
        else:
            if symbol not in non_terminals:
                non_terminals.append(symbol)
                productions[symbol] = []
        return symbol


    lines = inputTokens.split("\n")
    for line in lines:
        if len(line) > 0:
            line = line.strip() 
            line = line[:-1]
            parts = line.split("->")
            non_terminal = add_symbol(parts[0])
            if start_symbol == "":
                start_symbol = non_terminal
            productionsLine = parts[1].split("|")
            for prod in productionsLine:
                prod = prod.strip().split(" ")
                prod_to_add = []
                for symbol in prod:
                    symbol = add_symbol(symbol)
                    if len(symbol) > 0:
                        prod_to_add.append(symbol)
                productions[non_terminal].append(prod_to_add)

    return GrammarLR1(non_terminals, terminals, start_symbol, productions)


In [3]:
inputTokensAlvaro = """

Program -> ListDeclaration ExprSemicolon .

ListDeclaration -> Declaration ListDeclaration | .

Declaration -> ClassDecl | FunctionDecl | ProtocolDecl.

ClassDecl -> type id TypeBody .

TypeBody -> lbrace ListTypeBodyItems rbrace . 

ListTypeBodyItems -> TypeBodyItems ListTypeBodyItems | .

TypeBodyItems -> PrimitiveAssignment semicolon | MethodDecl.

PrimitiveAssignment -> id assign Expr .

MethodDeclFullForm -> id ArgNamesList BlockExpr .

MethodDeclInline ->  id ArgNamesList arrowright SingleExprSemicolon .

MethodDecl -> MethodDeclFullForm | MethodDeclInline .

FunctionDecl -> function MethodDecl.

ArgNamesList -> lparen BlockArgNamesList rparen .

BlockArgNamesList -> NonEmptyArgNamesList | .

NonEmptyArgNamesList -> id | id comma NonEmptyArgNamesList .

SingleExprSemicolon -> SingleExpr semicolon .

BlockExprSemicolon  ->   BlockExpr OptionalSemicolon.

OptionalSemicolon -> semicolon | .

ExprSemicolon -> SingleExprSemicolon | BlockExprSemicolon.
Expr -> SingleExpr | BlockExpr.

BlockExpr -> lbrace ExprBlock rbrace .

ExprBlock -> ExprSemicolon ExprBlock | . 

SingleExpr -> IfExpr | ForExpr | WhileExpr | LetExpr | DestructiveExpr | Term .

BodyExpr -> BlockExpr | Term .

IfExpr -> if lparen Expr rparen BodyExpr OptionalElif else BodyExpr .

OptionalElif -> elif lparen Expr rparen BodyExpr OptionalElif | . 

WhileExpr -> while lparen Expr rparen BodyExpr .

ForExpr -> for lparen id in BodyExpr rparen BodyExpr .

LetExpr -> let AsignmentLetList in Expr.

DestructiveExpr -> id destructiveoperator Expr .

AsignmentLetList -> PrimitiveAssignment | AsignmentLetListElements.

AsignmentLetListElements -> comma AsignmentLetList | .

Term -> Three .

Three -> ThreeLeft | ThreeRight .

ThreeRight -> Two | Two ThreeRightOperator ThreeRight.

ThreeRightOperator -> equal | notequal | greater | less | greaterequal | lessequal .

Two -> TwoLeft | TwoRight .

TwoLeft -> One | TwoLeft TwoLeftOperator One .

TwoLeftOperator -> plus | minus | and .

One -> OneLeft | OneRight . 

OneRight -> Zero | Zero OneRightOperator OneRight .

OneRightOperator -> mult | division | or .

Zero -> ZeroLeft | ZeroRight.

ZeroRight -> UnaryExpr | UnaryExpr ZeroRightOperator ZeroRight .

ZeroRightOperator -> exponentiation.

UnaryExpr -> DottedBase | UnaryOperator Base .

UnaryOperator -> minus | exclamation .

DottedBase -> Base | DottedBase dot Base . 

Base -> FunctionCall | lparen SingleExpr rparen | Literal.

Literal -> id | number | string | boolean .

FunctionCall -> Base ArgCallList .

ArgCallList -> lparen ArgsList rparen .

ArgsList -> NonEmptyArgCallList | .

NonEmptyArgCallList -> SingleExpr | SingleExpr comma NonEmptyArgCallList .

"""

grAlvaro = gramophoneSyntaxParser(inputTokensAlvaro)

In [4]:
tableAlvaro = grAlvaro.build_parsing_table()

print(tableAlvaro)

State                                           type                                            id                                              lbrace                                          rbrace                                          semicolon                                       assign                                          arrowright                                      function                                        lparen                                          rparen                                          comma                                           if                                              else                                            elif                                            while                                           for                                             in                                              let                                             destructiveoperator                             equal                                   

In [5]:
# 4;

node = tableAlvaro.parse(
    [Token(x, x, 0, 0) for x in ["lparen", "number", "rparen", "semicolon", "$"]]
)

node.root.print([0], 0, True)

for nonterminal in grAlvaro.non_terminals:
    print(nonterminal,  grAlvaro.productions[nonterminal])







Program:Program
|
|_______ListDeclaration:ListDeclaration
|
|_______ExprSemicolon:ExprSemicolon
        |
        |_______SingleExprSemicolon:SingleExprSemicolon
                |
                |_______SingleExpr:SingleExpr
                |       |
                |       |_______Term:Term
                |               |
                |               |_______Three:Three
                |                       |
                |                       |_______ThreeRight:ThreeRight
                |                               |
                |                               |_______Two:Two
                |                                       |
                |                                       |_______TwoLeft:TwoLeft
                |                                               |
                |                                               |_______One:One
                |                                                       |
                |                  

In [6]:
# { 
#   { 
#       4; 
#   } 
# };

node = tableAlvaro.parse(
    [Token(x, x, 0, 0) for x in ["lbrace", "number", "semicolon", "rbrace", "$"]]
)

node.root.print([0], 0, True)


Program:Program
|
|_______ListDeclaration:ListDeclaration
|
|_______ExprSemicolon:ExprSemicolon
        |
        |_______BlockExprSemicolon:BlockExprSemicolon
                |
                |_______BlockExpr:BlockExpr
                |       |
                |       |_______lbrace:lbrace
                |       |
                |       |_______ExprBlock:ExprBlock
                |       |       |
                |       |       |_______ExprSemicolon:ExprSemicolon
                |       |       |       |
                |       |       |       |_______SingleExprSemicolon:SingleExprSemicolon
                |       |       |               |
                |       |       |               |_______SingleExpr:SingleExpr
                |       |       |               |       |
                |       |       |               |       |_______Term:Term
                |       |       |               |               |
                |       |       |               |               |____

In [27]:
inputTokensJavier = """

Program -> Decls Expr .
Decls -> FuncDecl Decls | TypeDecl Decls | ProtocolDecl Decls | .

ProtocolDecl -> protocol id OptExtension lbrace ProtocolElems rbrace.
OptExtension -> extends id | .
ProtocolElems -> MethodSignature ProtocolElems | .
MethodSignature -> id lparen TypedParamList rparen colon id semicolon.
TypedParamList -> id colon id TypedParamTail | .
TypedParamTail -> comma id colon id TypedParamTail | .

TypeDecl -> type id OptParams OptInheritance lbrace TypeElems rbrace .

OptParams -> lparen ParamList rparen | .
OptInheritance -> inherits id OptArgs | .
OptArgs -> lparen ArgList rparen | .

TypeElems -> AttributeDecl TypeElems | MethodDecl TypeElems | .
AttributeDecl -> Assignment  semicolon .
MethodDecl -> id lparen ParamList rparen OptType FuncBody .
ParamList -> id OptType ParamTail | .
ParamTail -> comma id OptType ParamTail | .
FuncBody -> arrow BlockExpr semicolon .

Assignment -> id OptType equal Expr .
OptType -> colon id | .

FuncDecl -> function MethodDecl .

Expr -> BlockExpr | IfExpr | WhileExpr | ForExpr | LetExpr .

LetExpr -> let Assignment AssignmentList in Expr .
AssignmentList -> comma Assignment AssignmentList | .

BlockExpr -> lbrace ExprList rbrace | DestrucExpr .
ExprList -> Expr semicolon ExprList | .

IfExpr -> if lparen Expr rparen BlockExpr OptElif else BlockExpr .
OptElif -> elif lparen BlockExpr rparen BlockExpr .

WhileExpr -> while lparen Expr rparen BlockExpr .
ForExpr -> for lparen id in BlockExpr rparen BlockExpr .

DestrucExpr -> id destrucOp Expr | VectorExpr .

VectorExpr -> lbracket VectorElems rbracket | lbracket id doubleOr id in BlockExpr rbracket | LogicOr .
VectorElems -> Expr VectorTail | .
VectorTail -> comma Expr VectorTail | .

LogicOr -> LogicOr or LogicAnd | LogicAnd .
LogicAnd -> LogicAnd and Equality | Equality .
Equality -> Equality doubleEqual Comparison | Equality notEqual Comparison | Comparison .
Comparison -> Comparison greater Str | Comparison greaterEq Str | Comparison less Str | Comparison lessEq Str | Str .
Str -> Str strOp Term | Term .
Term -> Term plus Factor | Term minus Factor | Factor .
Factor -> Factor star Power | Factor div Power | Power .
Power -> Primary powerOp number | Primary .

Primary -> false | true | number | string | self CallList | id CallList | lparen Expr rparen CallList .

CallList -> dot id CallList | lparen ArgList rparen CallList | lbracket Expr rbracket CallList | .
ArgList -> Expr ArgTail | .
ArgTail -> comma Expr ArgTail | .

"""

grJavier = gramophoneSyntaxParser(inputTokensJavier)




In [29]:
# { 
#   { 
#       4; 
#   } 
# };

tableJavier = grJavier.build_parsing_table()
print(tableJavier)

State                                   protocol                                id                                      lbrace                                  rbrace                                  extends                                 lparen                                  rparen                                  colon                                   semicolon                               comma                                   type                                    inherits                                arrow                                   equal                                   function                                let                                     in                                      if                                      else                                    elif                                    while                                   for                                     destrucOp                               lbracket                                

In [30]:
node = tableJavier.parse(
    [Token(x, x, 0, 0) for x in [
        "id",
        "$"]]
)

node.root.print([0], 0, True)


Program:Program
|
|_______Decls:Decls
|
|_______Expr:Expr
        |
        |_______BlockExpr:BlockExpr
                |
                |_______DestrucExpr:DestrucExpr
                        |
                        |_______VectorExpr:VectorExpr
                                |
                                |_______LogicOr:LogicOr
                                        |
                                        |_______LogicAnd:LogicAnd
                                                |
                                                |_______Equality:Equality
                                                        |
                                                        |_______Comparison:Comparison
                                                                |
                                                                |_______Str:Str
                                                                        |
                                                               