In [1]:
from sly import Lexer

class MyLexer(Lexer):
    literals = { '+','-','*','/', "=", ";", ":", ",","(", ")", "'", "{", "}", "[", "]"}
    
    tokens = [
        # Macierzowe operatory binarne
        DOTADD, DOTSUB, DOTMUL, DOTDIV,  
        # Operatory przypisania
        ADDASSIGN, SUBASSIGN, MULASSIGN, DIVASSIGN,
        # Operatory relacyjne
        LT, GT, LE, GE, NE, EQ,  
        # Słowa kluczowe
        IF, ELSE, FOR, WHILE,
        BREAK, CONTINUE, RETURN,
        EYE, ZEROS, ONES,
        PRINT,
        ID, INTNUNM, FLOAT, STRING  # Identyfikatory, liczby, stringi
    ]


    ignore = ' \t'
    # \n jest również ignorowane, ale uwzględnione osobno, by liczyć linie

    @_(r'#.*')
    def ignore_comment(self, t):
        pass

    DOTADD = r'\.\+'
    DOTSUB = r'\.-'
    DOTMUL = r'\.\*'
    DOTDIV = r'\./'
    ADDASSIGN = r'\+='
    SUBASSIGN = r'-='
    MULASSIGN = r'\*='
    DIVASSIGN = r'/='
    LT = r'<'
    GT = r'>'
    LE = r'<='
    GE = r'>='
    NE = r'!='
    EQ = r'=='

    IF = r'if'
    ELSE = r'else'
    FOR = r'for'
    WHILE = r'while'
    BREAK = r'break'
    CONTINUE = r'continue'
    RETURN = r'return'
    EYE = r'eye'
    ZEROS = r'zeros'
    ONES = r'ones'
    PRINT = r'print'

    ID = r'[a-zA-Z_][a-zA-Z0-9_]*'

    @_(r'\d*\.\d+|\d+\.\d*')
    def FLOAT(self, t):
        t.value = float(t.value)
        return t

    @_(r'\d+')
    def INTNUM(self, t):
        t.value = int(t.value)
        return t

    @_(r'"[^"]*"')
    def STRING(self, t):
        t.value = t.value[1:-1] 
        return t
    
    @_(r'\n+')
    def newline(self, t):
        self.lineno += t.value.count('\n')


    def error(self, t):
        print(f'Incorrect sign: {t.value[0]} in line: {self.lineno}')
        self.index += 1

if __name__ == '__main__':
    lexer = MyLexer()

    examples = [
    """A = zeros(5); # create 5x5 matrix filled with zeros
    B = ones(7);  # create 7x7 matrix filled with ones
    I = eye(10);  # create 10x10 matrix filled with ones on diagonal and zeros elsewhere
    D1 = A.+B' ;  # add element-wise A with transpose of B
    D2 -= A.-B' ; # substract element-wise A with transpose of B
    D3 *= A.*B' ; # multiply element-wise A with transpose of B
    D4 /= A./B' ; # divide element-wise A with transpose of B
    """,

    """ res1 = 60.500;
    res2 = 60.;
    res3 = .500;
    res4 = 60.52E2;
    str = "Hello world";

    if (m==n) { 
        if (m >= n) 
            print res;
    }
    """,
    
    """ E1 = [ [ 1, 2, 3],
       [ 4, 5, 6],
       [ 7, 8, 9] ];    
    """,

    # own examples
    """ 4.0>$
    """,

    """ val = 5.0
        ! = 1
    """
    ]

    for i, ex in enumerate(examples):
    # i = 3
    # ex = examples[i]
        print(f"Przykład {i+1}:\n")
        for tok in lexer.tokenize(ex):
            print(f"({tok.lineno}): {tok.type} ({tok.value})")
        if i+1!= len(examples): print("_____________________________")

Przykład 1:

(1): ID (A)
(1): = (=)
(1): ZEROS (zeros)
(1): ( (()
(1): INTNUM (5)
(1): ) ())
(1): ; (;)
(2): ID (B)
(2): = (=)
(2): ONES (ones)
(2): ( (()
(2): INTNUM (7)
(2): ) ())
(2): ; (;)
(3): ID (I)
(3): = (=)
(3): EYE (eye)
(3): ( (()
(3): INTNUM (10)
(3): ) ())
(3): ; (;)
(4): ID (D1)
(4): = (=)
(4): ID (A)
(4): DOTADD (.+)
(4): ID (B)
(4): ' (')
(4): ; (;)
(5): ID (D2)
(5): SUBASSIGN (-=)
(5): ID (A)
(5): DOTSUB (.-)
(5): ID (B)
(5): ' (')
(5): ; (;)
(6): ID (D3)
(6): MULASSIGN (*=)
(6): ID (A)
(6): DOTMUL (.*)
(6): ID (B)
(6): ' (')
(6): ; (;)
(7): ID (D4)
(7): DIVASSIGN (/=)
(7): ID (A)
(7): DOTDIV (./)
(7): ID (B)
(7): ' (')
(7): ; (;)
_____________________________
Przykład 2:

(1): ID (res1)
(1): = (=)
(1): FLOAT (60.5)
(1): ; (;)
(2): ID (res2)
(2): = (=)
(2): FLOAT (60.0)
(2): ; (;)
(3): ID (res3)
(3): = (=)
(3): FLOAT (0.5)
(3): ; (;)
(4): ID (res4)
(4): = (=)
(4): FLOAT (60.52)
(4): ID (E2)
(4): ; (;)
(5): ID (str)
(5): = (=)
(5): STRING (Hello world)
(5): ; (;)
(7): I