In [136]:
# calclex.py

from sly import Lexer

class CalcLexer(Lexer):
    # Set of token names.   This is always required
    tokens = { ID, PRINT, BOOLEAN, EXTENDS, STRING, PUBLIC, LENGTH, STATIC, RETURN, WHILE, CLASS, 
              FALSE, ELSE, TRUE, VOID, MAIN, THIS, INT, NEW, IF, NUM, AND, NOT, EQUALS, LESS, PLUS,
              MINUS, TIMES, DOT, SEMICOLON, COMMA, LEFTBRACE, RIGHTBRACE, LEFTPARENT, RIGHTPARENT,
              LEFTSQRBRACKET, RIGHTSQRBRACKET, NUMBER}

    # String containing ignored characters
    ignore = ' \t\r\f'

    # Regular expression rules for tokens
    PLUS    = r'\+'
    MINUS   = r'-'
    TIMES   = r'\*'
    EQUALS  = r'='
    PRINT   = r'System\.out\.println'
    DOT     = r'\.'
    LESS    = r'<'
    NOT     = r'!'
    AND     = r'&&'
    COMMA   = r','
    SEMICOLON       = r';'
    LEFTBRACE       = r'\{'
    RIGHTBRACE      = r'\}'
    LEFTPARENT      = r'\('
    RIGHTPARENT     = r'\)'
    LEFTSQRBRACKET  = r'\['
    RIGHTSQRBRACKET = r'\]'

    @_(r'\d+')
    def NUMBER(self, t):
        t.value = int(t.value)
        return t
    
    @_(r'true')
    def TRUE(self, t):
        t.value = True
        return t
    
    @_(r'false')
    def FALSE(self, t):
        t.value = False
        return t
    
    # Identifiers and keywords
    ID = r'[a-zA-Z_][a-zA-Z0-9_]*'
    ID['if']    = IF
    ID['else']  = ELSE
    ID['while'] = WHILE
    ID['class'] = CLASS
    ID['public'] = PUBLIC
    ID['static'] = STATIC
    ID['void']   = VOID
    ID['main']   = MAIN
    ID['extends']= EXTENDS 
    ID['return'] = RETURN
    ID['int']    = INT
    ID['boolean']= BOOLEAN
    ID['new']    = NEW
    ID['length'] = LENGTH
    ID['this']   = THIS
    ID['String'] = STRING
    

    # Line number tracking
    @_(r'\n+')
    def ignore_newline(self, t):
        self.lineno += t.value.count('\n')

    def error(self, t):
        print('Line %d: Bad character %r' % (self.lineno, t.value[0]))
        self.index += 1

if __name__ == '__main__':
    data = '''
            class Fac {
            public int ComputeFac(int num) {
            int num_aux;
            if (num < 1)
            num_aux = 1;
            else
            num_aux = num * (this.ComputeFac(num-1));
            return num_aux;
            }
            }
        '''
    lexer = CalcLexer()
    for tok in lexer.tokenize(data):
        print(tok)

Token(type='CLASS', value='class', lineno=2, index=13)
Token(type='ID', value='Fac', lineno=2, index=19)
Token(type='LEFTBRACE', value='{', lineno=2, index=23)
Token(type='PUBLIC', value='public', lineno=3, index=37)
Token(type='INT', value='int', lineno=3, index=44)
Token(type='ID', value='ComputeFac', lineno=3, index=48)
Token(type='LEFTPARENT', value='(', lineno=3, index=58)
Token(type='INT', value='int', lineno=3, index=59)
Token(type='ID', value='num', lineno=3, index=63)
Token(type='RIGHTPARENT', value=')', lineno=3, index=66)
Token(type='LEFTBRACE', value='{', lineno=3, index=68)
Token(type='INT', value='int', lineno=4, index=82)
Token(type='ID', value='num_aux', lineno=4, index=86)
Token(type='SEMICOLON', value=';', lineno=4, index=93)
Token(type='IF', value='if', lineno=5, index=107)
Token(type='LEFTPARENT', value='(', lineno=5, index=110)
Token(type='ID', value='num', lineno=5, index=111)
Token(type='LESS', value='<', lineno=5, index=115)
Token(type='NUMBER', value=1, lineno=

In [224]:
from sly import Parser
#from calclex import CalcLexer

class CalcParser(Parser):
    # Get the token list from the lexer (required)
    tokens = CalcLexer.tokens
    
    debugfile = 'parser.out'
    
    precedence = (
        ('right', NOT),
        ('left', PLUS, MINUS),
        ('left', TIMES),
    )
    
    # ------------------------------ Main Declaration ---------------------------------------
    
    @_('CLASS ID LEFTBRACE PUBLIC STATIC VOID MAIN LEFTPARENT STRING LEFTSQRBRACKET RIGHTSQRBRACKET ID RIGHTPARENT LEFTBRACE stm RIGHTBRACE RIGHTBRACE')
    def mainClass(self, p):
        return p
    
    # ------------------------------ Method Declaration ---------------------------------------
    @_('tp ID SEMICOLON')
    def varDecl(self, p):
        return p
    
    @_('tp ID formalRest')
    def formalList(self, p):
        return p
    
    @_('')
    def formalList(self, p):
        return ''
    
    @_('COMMA tp ID')
    def formalRest(self, p):
        return p
    
    # ------------------------------ Type ---------------------------------------
    @_('INT LEFTSQRBRACKET RIGHTSQRBRACKET')
    def tp(self, p):
        return p
    
    @_('BOOLEAN')
    def tp(self, p):
        return p
    
    @_('INT')
    def tp(self, p):
        return p
    
    @_('ID')
    def tp(self, p):
        return p
    
    # ------------------------------ STATEMENT ---------------------------------------
    @_('IF LEFTPARENT expr RIGHTPARENT stm ELSE stm')
    def stm(self, p):
        return p
    
    @_('WHILE LEFTPARENT expr RIGHTPARENT stm')
    def stm(self, p):
        return p
    
    @_('PRINT LEFTPARENT expr RIGHTPARENT SEMICOLON')
    def stm(self, p):
        return p
    
    @_('ID EQUALS expr SEMICOLON')
    def stm(self, p):
        return p
    
    @_('ID LEFTSQRBRACKET expr RIGHTSQRBRACKET EQUALS expr SEMICOLON')
    def stm(self, p):
        return p

    # ------------------------------ EXPRESSÔES ---------------------------------------
    @_('expr PLUS term')
    def expr(self, p):
        return p.expr + p.term

    @_('expr MINUS term')
    def expr(self, p):
        return p.expr - p.term
    
    @_('term')
    def expr(self, p):
        return p.term

    @_('term TIMES factor')
    def term(self, p):
        return p.term * p.factor

    @_('factor')
    def term(self, p):
        return p.factor

    @_('NUMBER')
    def factor(self, p):
        return p.NUMBER
    
    @_('TRUE')
    def expr(self, p):
        return p.TRUE
    
    @_('FALSE')
    def expr(self, p):
        return p.FALSE
    
    @_('THIS')
    def expr(self, p):
        return p.THIS
    
    @_('NOT expr')
    def expr(self, p):
        return not p
    
    @_('LEFTPARENT expr RIGHTPARENT')
    def expr(self, p):
        return p
    
    @_('expr DOT LENGTH')
    def expr(self, p):
        return p
    
    @_('ID')
    def expr(self, p):
        return p
    
    @_('NEW ID LEFTPARENT RIGHTPARENT')
    def expr(self, p):
        return p
    
    @_('NEW INT LEFTSQRBRACKET expr RIGHTSQRBRACKET')
    def expr(self, p):
        return p
    
    @_('expr LEFTSQRBRACKET expr RIGHTSQRBRACKET')
    def expr(self, p):
        return p
    
    @_('expr DOT ID LEFTPARENT exprList RIGHTPARENT')
    def expr(self, p):
        return p
    
    @_('expr exprRest')
    def exprList(self, p):
        return p
    
    @_('')
    def exprList(self, p):
        return ''
    
    #Lista de expressões
    @_('COMMA expr exprRest')
    def exprRest(self, p):
        return p
    
    @_('')
    def exprRest(self, p):
        return ''
    # ------------------------------------------------

    #@_('LPAREN expr RPAREN')
    #def factor(self, p):
    #    return p.expr
    
    #@_('')
    #def empty(self, p):
    #    pass

if __name__ == '__main__':
        lexer = CalcLexer()
        parser = CalcParser()

    #while True:
        try:
            text = '''
            class meunome {public static void main (String [] args){ u = 10;}}
            '''
            result = parser.parse(lexer.tokenize(text))
            print(result)
        except EOFError:
            ...
            #pass
            #break

None


Parser debugging for CalcParser written to parser.out
sly: Syntax error at line 2, token=ID


In [10]:
!pip3 install ply

