# Verse Interpreter development

Essential installs:
- `pip3 install .....`


This version is used to test the first steps for the verse interpreter. The final version will be used as a full python file.

# ERROR-TYPE ENUMERATION

In [30]:
from enum import Enum

class ErrorType(Enum):
    SyntaxError = 'Wrong Syntax at'
    SemanticError = 'Wrong Semantics at'
    UnkownError = 'Operation Failure'

# LOGGER CLASSS

In [31]:
class Logger:
    def __init__(self):{}
        
    def __log__(self, string:str):{}

    def __log_error__(self,string:str, type:ErrorType):{}

In [32]:
class Console_Logger(Logger):

    def __log__(self, string:str):
        print(string)

    def __log_error__(self,string:str, type:ErrorType):       
        print("ERROR| " + type.value + ": " + string)


In [33]:
from enum import Enum
import string

# class syntax

class TokenTypes(Enum):
    # Data
    INTEGER = int
    IDENTIFIER = string #Names/Variables
    INT_TYPE = "int"
    TUPLE_TYPE = "tuple"
    ARRAY_TYPE = "array"
    # Aritmetics
    PLUS = "+"
    MINUS = "-"
    MULTIPLY = "*"
    DIVIDE = "/"
    GREATER = ">"
    GREATEREQ = ">="
    LOWER = "<"
    LOWEREQ = "<="
    CHOICE = "|"
    # Mehtods
    FOR = "for"
    IF = "if"
    THEN = "then"
    ELSE = "else"
    LBRACKET = "("
    RBRACKET = ")"
    # Else
    EOF = None
    COLON = ":"
    COMMA=","
    SemiColon =";"
    BINDING =":="
    SLB = "["
    SRB = "]"
    CLB = "{"
    CRB = "}"
    EQUAL = "="
    SCOPE = ":"

    



# File Reader

In [34]:
class FileReader:
    def __init__(self):{}

    
    def get_Lines(self, name:str):
       
        try:
            f = open('..\modules\{}'.format(name),'r')
            lines = f.read().split("\n")
            return (lines,True)
        except:
            return ([],False)
        
       

   
reader = FileReader()
segments,read_success = reader.get_Lines('example.txt')
print(segments)

['halloo wie geht', 'ws', 'efaf', ' aefe ', ' eeerwre']


In [35]:
class Token:
     def __init__(self, type: TokenTypes,value):
        self.value = value
        self.type = type

     def __info__(self):
         return "{}:{}".format(self.type, self.value)


In [36]:
class lexicon:
    def __init__(self, input: string):
        self.input = input
        self.index = 0
        self.current_char = self.input[self.index]
    
    # moves the pointer a character forward
    def forward(self) -> None:
        self.index += 1

        # checks if index is out of range
        if (self.index >= len(self.input)):
            self.current_char = None
            return
        
        self.current_char = self.input[self.index]
    
    def backward(self) -> None:
        self.index -= 1

        # checks if index is out of range
        if self.index < 0:
            self.current_char = None
            return
        
        self.current_char = self.input[self.index]
    
    def get_int(self) -> int:
        if self.index >= len(self.input):
            return None
        
        result = self.input[self.index]

        # checks if there are multiple digits
        while True:
            self.forward()

            if self.index < len(self.input) and self.input[self.index] != None and self.input[self.index].isnumeric():
                result += self.input[self.index]
            else:
                self.backward()
                break

        return int(result)
    
    def get_var(self) -> string:
        if self.index >= len(self.input):
            return None
        
        result = self.input[self.index]

        # checks if there is a longer variable name
        while True:
            self.forward()

            if self.index < len(self.input) and self.input[self.index] != None and self.input[self.index].isalpha():
                result += self.input[self.index]
            else:
                self.backward()
                break
        
        return result
    
    def get_token(self, char: string) -> Token:
        token = self.check_for_tokentypes(char)

        if token.type != TokenTypes.EOF:
            return token
        
        if char == None:
            return token
            
        # skip spaces.
        if char == ' ':
            self.forward()
            return self.get_token(self.current_char)

        # checks if the current character is a number.
        if char.isnumeric():
            result = self.get_int()
            return Token(TokenTypes.INTEGER, result)
        
        if char.isalpha():
            result = self.get_var()
            token = self.check_for_tokentypes(result)
                  
        return token

    def check_for_tokentypes(self, char: string) -> Token:
         # checks if the current character is a supported token type.
        match char:
            case TokenTypes.DIVIDE.value:
                return Token(TokenTypes.DIVIDE, TokenTypes.DIVIDE.value)
            case TokenTypes.MULTIPLY.value:
                return Token(TokenTypes.MULTIPLY, TokenTypes.MULTIPLY.value)
            case TokenTypes.PLUS.value:
                return Token(TokenTypes.PLUS, TokenTypes.PLUS.value)
            case TokenTypes.MINUS.value:
                return Token(TokenTypes.MINUS, TokenTypes.MINUS.value)
            case TokenTypes.LBRACKET.value:
                return Token(TokenTypes.LBRACKET, TokenTypes.LBRACKET.value)
            case TokenTypes.RBRACKET.value:
                return Token(TokenTypes.RBRACKET, TokenTypes.RBRACKET.value)
            case TokenTypes.EQUAL.value:
                return Token(TokenTypes.EQUAL, TokenTypes.EQUAL.value)
            case TokenTypes.GREATER.value:
                return Token(TokenTypes.GREATER, TokenTypes.GREATER.value)
            case TokenTypes.LOWER.value:
                return Token(TokenTypes.LOWER, TokenTypes.LOWER.value)
            case TokenTypes.FOR.value:
                return Token(TokenTypes.FOR, TokenTypes.FOR.value) 
            case _:
                return Token(TokenTypes.EOF, None)

In [37]:
lexer = lexicon("(for 123 >= 5 * 23)")

while lexer.current_char is not None:
    token = lexer.get_token(lexer.current_char)
    print(str(token.value) + " is of the tokentype: " + str(token.type))
    lexer.forward()

( is of the tokentype: TokenTypes.LBRACKET
for is of the tokentype: TokenTypes.FOR
123 is of the tokentype: TokenTypes.INTEGER
> is of the tokentype: TokenTypes.GREATER
= is of the tokentype: TokenTypes.EQUAL
5 is of the tokentype: TokenTypes.INTEGER
* is of the tokentype: TokenTypes.MULTIPLY
23 is of the tokentype: TokenTypes.INTEGER
) is of the tokentype: TokenTypes.RBRACKET


In [38]:
class BaseNode:
    def __init__(self, token) -> None:
        self.token = token
    
    def visit(self, node):
        if isinstance(node, OperatorNode):
            return self.visit_operatorNode(node)
        
        if isinstance(node, UnaryNode):
            return self.visit_unaryNode(node)
        
        return self.visit_numberNode(node)
    

    def visit_unaryNode(self, node):
        match node.token.type:
            case TokenTypes.MINUS:
                return -1 * self.visit(node.node)
            case TokenTypes.PLUS:
                return 1 * self.visit(node.node)
            

        
    def visit_operatorNode(self, node):
        match node.token.type:
            case TokenTypes.DIVIDE:
                return self.visit(node.leftNode) // self.visit(node.rightNode)
            case TokenTypes.MULTIPLY:
                return self.visit(node.leftNode) * self.visit(node.rightNode)
            case TokenTypes.PLUS:
                return self.visit(node.leftNode) + self.visit(node.rightNode)
            case TokenTypes.MINUS:
                return self.visit(node.leftNode) - self.visit(node.rightNode)   
            case TokenTypes.EQUAL:
                if(self.visit(node.leftNode) == self.visit(node.rightNode)):
                    return self.visit(node.leftNode)
                return ""
            case TokenTypes.GREATER:
                if(self.visit(node.leftNode) > self.visit(node.rightNode)):
                    return self.visit(node.leftNode)
                return ""
            case TokenTypes.LOWER:
                 if(self.visit(node.leftNode) < self.visit(node.rightNode)):
                    return self.visit(node.leftNode)
                 return ""   

    def visit_numberNode(self, node):
        return node.value


class BlockNode(BaseNode):
    def __init__(self, node:list[BaseNode]) -> None:
        self.node = node

class Program(BaseNode):
    def __init__(self, node:list[BlockNode]) -> None:
        self.node = node


class ScopeNode(BaseNode):
    def __init__(self,token:Token, nodes:list[BaseNode]) -> None:
        super().__init__(token)
        self.nodes = nodes

class NumberNode(BaseNode):
    def __init__(self, token:Token) -> None:
        super().__init__(token)
        self.value = token.value

class OperatorNode(BaseNode):
    def __init__(self, token:Token, leftNode: BaseNode, rightNode: BaseNode) -> None:
        super().__init__(token)
        self.leftNode = leftNode
        self.rightNode = rightNode

class UnaryNode(BaseNode):
     def __init__(self, token:Token, node) -> None:
        super().__init__(token)
        self.node = node

class ScopeNode(BaseNode):
    def __init__(self, token:Token, nodes:list[BaseNode], type) -> None: #Change into Variable/IdentifierNode
        super().__init__(token)
        self.nodes = nodes
        self.type = type

class TypeNode(BaseNode):
    def __init__(self, token:Token, type) -> None: 
        super().__init__(token)
        self.type = type

class TypeNodeSequence(TypeNode):
    def __init__(self, token:Token, nodes:list[TypeNode], type) -> None: 
        super().__init__(token)
        self.nodes = nodes
        self.type = type


class ArgumentsNode(): #Doesnt need BaseNode since it doesn't have token
    def __init__(self, nodes:list[BaseNode]) -> None: 
        self.nodes = nodes

class FuncCallNode():
    def __init__(self,identifier:BaseNode, args:ArgumentsNode) -> None: #instead of BaseNode maybe IdentifierNode
        self.args = args


In [39]:
#Class that takes a parsed node, containes information if node could have been parsed
class ParsedNode:
    def __init__(self, node:BaseNode, hasSyntaxError:bool ):
        self.node = node
        self.hasSyntaxError = hasSyntaxError

    
        

In [40]:
class Parser:
    def __init__(self, lexer: lexicon):
       self.logger: Logger = Console_Logger()
       self.end = False
       self.lexer = lexer
       self.current_token = lexer.get_token(self.lexer.current_char)
       

    def parse(self) -> ParsedNode:     
        node = self.program().node
        return node
       
  

    def program(self) -> ParsedNode:
        return self.expr()

    def block(self) -> ParsedNode:
        pass

    def statement_list(self):
        pass

    def statement(self):
        pass

    def func_call(self) -> ParsedNode:
        # RULE --> IDENTIFIER LB (func_call_param)? RB  NOT IMPLEMENTED
        node = self.Identifier()

        if(self.current_token.type == TokenTypes.LBRACKET):
            self.forward()
            if(self.current_token.type != TokenTypes.RBRACKET):
                args = self.func_call_args()
                if(args.hasSyntaxError == False and self.current_token.type == TokenTypes.RBRACKET):
                    pass # return Func_call Node with Args
                return ParsedNode(None,True)
            else: pass # Return FunktionNode with empty Args
        return node
        
    
    def func_call_args(self):
        # expr (COMMA expr)*?  
        nodes:list[BaseNode] = [] # Args Instead Base Node maybe ArgsNode

        arg_1 = self.expr() # 1. Arg

        if(arg_1.hasSyntaxError == False):
                nodes.append(arg_1)
                
                # The while method "concatenates" the operations

                while(self.check_type(self.current_token.type,[TokenTypes.COMMA])):           
                   
                    self.forward()
                    arg = self.expr() #

                    if(arg.hasSyntaxError):
                        return arg
                    
                    nodes.append(arg)

                pass # Return list of node args, we could maybe have an ArgumentsNode?
                    # Binds found operation to its left node
                          
        return ParsedNode(None, True)

    def func_decl(self) -> ParsedNode:
        # IDENTIFIER LB func_dec_param RB (COLON type)? BINDING block
          # |IDENTIFIER BINDING LB nested_scope LAMBDA expr RB     UPDATE
          # 
        identifier = self.Identifier()

        if(self.current_token.type == TokenTypes.LBRACKET):
            params = self.func_dec_param()
            if(params.hasSyntaxError == False and self.current_token.type == TokenTypes.RBRACKET):
                self.forward()
                if(self.current_token.type == TokenTypes.COLON):
                    token = self.current_token
                    type = self.type()
                    if(type.hasSyntaxError):
                        return ParsedNode(None, True)
                        
                    if(self.current_token.type == TokenTypes.BINDING):
                        self.forward() 
                        block = self.block()
                        if(block.hasSyntaxError):
                            return ParsedNode(None, True)
            pass      
        pass 

    def func_dec_param(self) -> ParsedNode:

        pass

    def if_statement(self) -> ParsedNode:
        # RULE --> IF LB expr RB ((THEN block ELSE block) | (THEN CBL expr CBR ELSE CBL expr CBR))
             
        if(self.current_token.type == TokenTypes.IF):
            token = self.current_token
            self.forward()

            if(self.current_token.type == TokenTypes.LBRACKET):
                condition = self.expr() # Gets condition part of if statement

                if(condition.hasSyntaxError == False and self.current_token.type == TokenTypes.RBRACKET):
                    self.forward()

                    # Check then block
                    if(self.current_token.type == TokenTypes.THEN):
                        self.forward()

                        then_slb = False

                        # If, then block uses curvy brackets
                        if(self.current_token.type == TokenTypes.CLB):
                            then_clb = True

                        then_block = self.block()

                        if(then_clb and self.current_token.type != TokenTypes.CRB):
                            return ParsedNode(None,True)
                        else: self.forward()

                        else_clb = False

                        # If, else block uses curvy brackets, then block needs to use curvy brackets as well
                        if(then_clb and self.current_token.type == TokenTypes.CLB):
                            else_clb = True
                        else: return ParsedNode(None,True)
                        
                        else_block = self.block()

                        if(else_clb and self.current_token.type != TokenTypes.CRB):
                            return ParsedNode(None,True)
                        else: self.forward()

                    pass # Return IfNod (condition, then_block, else_block)

        return ParsedNode(None, True)

    def for_loop(self) -> ParsedNode:
        pass

    def nested_scope(self) -> ParsedNode:
        # RULE --> IDENTIFIER (,IDENTIFIER)*? COLON TYPE
        node = self.Identifier()
        
        if(node.hasSyntaxError == False):
            identifiers:list[BaseNode] = []
            identifiers.append(node)

            # Checks if next (current) token is comma and saves it in comman_next (Used for later in method)
            comma_next = self.check_type(self.current_token.type,[TokenTypes.COMMA])

            # Iterates while-loop and tries to get all identifiers seperated by a comma
            while(self.check_type(self.current_token.type,[TokenTypes.COMMA])):
                self.forward()
                node = self.Identifier()
                if(node.hasSyntaxError):
                    return ParsedNode(None, True)
                
                identifiers.append(node) # Stores identifier in a Node list

            # Checks if the next token is a colon and then tries to get the type of the scope
            if(self.check_type(self.current_token.type,[TokenTypes.COLON])): 
                token = self.current_token
                self.forward()

                # Tries to get the type of the scope
                type = self.type()

                # If type couldn't be retrieved --> error
                if(type.hasSyntaxError):
                    return ParsedNode(None,True)
                pass # Return NestedScopeNode or ScopeNode
            
            # Here we check if it went into the comma while-loop to indicate if it tried to get a nested scope
            # if it didnt go into the while-loop or didnt do the if-statement for colon (scope), then it surely 
            # couldn't have been a scope, so return the node recevied by the first node = self.Identifier()
            if(comma_next == False):
                return node 
        
        return ParsedNode(None, True)



#####################################

    def expr(self) -> ParsedNode:         
        return self.operation()
    
    """
    This method checks if a token any of the following operations: =, <, >, <=, >=, |, +, -
    Since all of this operations have the same priority and same values output, it is not needed to write them in different methods
    """
    def operation(self):
        # RULE --> op: term ((GT|LT|GE|LE|EQUAL|CHOICE|PLUS|MINUS) term)*

        left_node = self.term()

        # Checks if left node has been received and if the following token is one of the following tokens: : =, <, >, <=, >=, |, +, -

        if(left_node.hasSyntaxError == False and (self.check_type(self.current_token.type,
                [TokenTypes.GREATER,TokenTypes.GREATEREQ,TokenTypes.LOWER,TokenTypes.LOWEREQ, TokenTypes.CHOICE, TokenTypes.PLUS,
                TokenTypes.MINUS, TokenTypes.EQUAL]))):

                node = ParsedNode(None,True)
                
                # The while method "concatenates" the operations

                while(self.check_type(self.current_token.type,
                [TokenTypes.GREATER,TokenTypes.GREATEREQ,TokenTypes.LOWER,TokenTypes.LOWEREQ, TokenTypes.CHOICE, TokenTypes.PLUS,
                TokenTypes.MINUS, TokenTypes.EQUAL])):
                
                    token = self.current_token
                    self.forward()
                    right_node = self.term()
                    if(right_node.hasSyntaxError):
                        return right_node
                    
                    # Binds found operation to its left node
                    if(node.node == None):
                       node = ParsedNode(OperatorNode(token,left_node.node,right_node.node),False)
                    else: node = ParsedNode(OperatorNode(token,node.node,right_node.node),False)
                return node
        return left_node

    """
    Checks the same way in operation method but here it checks for *, /
    """
    def term(self) -> ParsedNode:
        # RULE --> factor ((MUL|DIV) factor)*
        
        left_node = self.factor() 

        if(left_node.hasSyntaxError == False and (self.check_type(self.current_token.type,[TokenTypes.MULTIPLY, TokenTypes.DIVIDE]))):
            node = ParsedNode(None,True)

             # The while method "concatenates" the operations
            while(self.check_type(self.current_token.type,[TokenTypes.MULTIPLY, TokenTypes.DIVIDE])):
               
                token = self.current_token
                self.forward()
                right_node = self.factor()
                if(right_node.hasSyntaxError):
                    return right_node
                
                # Binds found operation to its left node
                if(node.node == None):
                  node = ParsedNode(OperatorNode(token,left_node.node,right_node.node),False)
                else: node = ParsedNode(OperatorNode(token,node.node,right_node.node),False)
            return node
        return left_node
    
    """
    Checks for unary operations, Integers, brackets (highest priority)
    """
    def factor(self) -> ParsedNode:
      
      # RULE -->  INTEGER  
       # : brackets
       # : (MINUS|PLUS) arith
       # : func_call x() x
       # : indexing     NOT IMPLEMENTING
       # : --> means the same as (brackets|unary|func_call) just like in operation()
       # only that for each if a different Node may be created not such as only OperationNode like in operation()
       
        token = self.current_token
        index = self.lexer.index

      
        #Integer check
        if(token.type == TokenTypes.INTEGER):
            self.forward()
            return ParsedNode(NumberNode(token),False)
        
        #Unary operation check
        if(self.check_type(self.current_token.type,[TokenTypes.PLUS, TokenTypes.MINUS])):
            self.forward()
            node = self.operation()
            if(node.hasSyntaxError):        # (--) --> Error needs (-- expr) or (--3)
                return ParsedNode(None, True)
            return ParsedNode(UnaryNode(token,node.node),False)
        
        #Brackets check
        return self.brackets() #Returns invalid Node on invalid Syntax
    
    """
    Checks for brackets (highest priority)
    """
    def brackets(self) -> ParsedNode:
        # RULE --> brackets: LB expr RB 
        if(self.current_token.type == TokenTypes.LBRACKET):
            self.forward()
            node = self.expr()
        
            if(self.current_token.type == TokenTypes.RBRACKET):
                self.forward()
                return node
        return ParsedNode(None,True)
        

    """
    METHODS FROM HERE NEEDS TO BE FULLY IMPLEMENTED AND CHECKED (except forward, check_type, set_to_token)!
    """

    """
    y := 8 y:=(x:int)  y:= method(...)...
    """
    def binding(self) -> ParsedNode:
          # RULE --> scope BINDING expr     NEED UPDATE
        left_node = self.scope()

        if(left_node.hasSyntaxError == False):
            if(self.check_type(self.current_token.type,[TokenTypes.BINDING])):
                token = self.current_token
                self.forward()
                right_node = self.expr()
                if(right_node.hasSyntaxError == False):
                    pass # Return Node
                else: ParsedNode(None,True)
        return left_node

    """
    x:int
    """
    def scope(self) -> ParsedNode:
      # RULE --> Identifier COLON type    NEED UPDATE

        left_node = self.Identifier()
        if(left_node.hasSyntaxError == False):
            if(self.check_type(self.current_token.type,[TokenTypes.COLON])):
                token = self.current_token
                self.forward()
                type = self.type()
                if(type.hasSyntaxError == False):
                    pass # Return Scope Node
                else: ParsedNode(None,True)
        return left_node
    

    """
    variable/method name
    """
    def Identifier(self) -> ParsedNode:
        #RULE --> identifier            NEED UPDATE

        token = self.current_token

        if(token.type == TokenTypes.IDENTIFIER):
            self.forward()
            pass # Return Identifier Node
        return ParsedNode(None, True) 
        
    """
    int or tuple(int,int) or array{int}
    """
    def type(self) -> ParsedNode:
        # RULE -->  INT                         NEED UPDATE
        #        : TUPLE LB type (,type)* RB    

        token = self.current_token
        if(token == TokenTypes.INT_TYPE):
            self.forward()
            pass # Return Node
        
        if(token == TokenTypes.TUPLE_TYPE):
             self.forward()
             if(token == TokenTypes.LBRACKET):
                 
                 types:list[TypeNode] = []

                 types = self.type()
                 if(types.hasSyntaxError == False):
                     if(self.check_type(self.current_token.type, [TokenTypes.COMMA])):
                        while(self.current_token.type == TokenTypes.COMMA):

                            self.forward()
                            t = self.type()

                            if(t.hasSyntaxError):  #If on error
                                return ParsedNode(None,True)
                            types.append(t) #else append to list of types
                      
                     pass # Return TypNode with list of nodes

             ParsedNode(None, True) 
        
        return ParsedNode(None, True) 
        
    """
    a[i:int]
    """
    def indexing(self) -> ParsedNode:
        # RULE --> Identifier SLB expr SRB

        left_node = self.Identifier()

        if(left_node.hasSyntaxError == False):
            if(self.current_token.type == TokenTypes.SLB):
                self.forward()
                expr_node = self.expr()
          
                if(expr_node.hasSyntaxError == False and self.current_token == TokenTypes.SRB):
                     pass # RETURN INDEXING NODE
                return ParsedNode(None,True)
        return left_node
                

    """
    Moves forward in the tokens list
    """
    def forward(self) -> None:
        self.lexer.forward()
        self.current_token = lexer.get_token(self.lexer.current_char)
        if (self.current_token.type == TokenTypes.EOF):
            self.end = True
        print(self.current_token.__info__())

        
    """
    Checks if a type exists in the following types list
    """
    def check_type(self,type:TokenTypes,types:list[TokenTypes]) -> bool:
        return type in types
    

    """
    Sets current token back if a certain path lead to failure (Wrong syntax)
    May need it for later
    """
    def set_to_token(self,index): 
        self.lexer.index = index
        self.forward()


   

    



In [41]:


class Interpreter:
    def __init__(self, parser: Parser):
        self.parser = parser

    def interpret(self):
        tree = self.parser.parse()
        return tree.visit(tree)



In [45]:


text = "-2 * 2"
lexer = lexicon(text)
parser = Parser(lexer)
interpreter = Interpreter(parser)
result = interpreter.interpret()
result



TokenTypes.INTEGER:2
TokenTypes.MULTIPLY:*
TokenTypes.MINUS:-
TokenTypes.MINUS:-
TokenTypes.MINUS:-
TokenTypes.INTEGER:2
TokenTypes.EOF:None


4