# Verse Interpreter development

Essential installs:
- `pip3 install .....`


This version is used to test the first steps for the verse interpreter. The final version will be used as a full python file.

In [1]:
import string
import math
from enum import Enum

## Setting Token Enum for testing purpose

In [13]:
class TokenTypes(Enum):
    # Data
    Integer = int
    Identifier = string #Names/Variables
    Type = None
    # Aritmetics
    Plus = "+"
    Minus = "-"
    Multiply = "*"
    Divide = "/"
    Greater = ">"
    GreaterEq = ">="
    Lower = "<"
    LowerEq = "<="
    Choice = "|"
    # Mehtods
    For = "for"
    LBracket = "("
    RBracket = ")"
    # Else
    EOF = None
    Colon = ":"
    Comma=","
    SemiColon =";"
    Binding =":="
    SLB = "["
    SRB = "]"
    CurlBracketL = "{"
    CurlBracketR = "}"
    Equal = "="
    Scope = ":"

In [7]:
list(TokenTypes)

[<TokenTypes.Integer: <class 'int'>>,
 <TokenTypes.Identifier: <module 'string' from 'c:\\Users\\marce\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\string.py'>>,
 <TokenTypes.Type: None>,
 <TokenTypes.Plus: '+'>,
 <TokenTypes.Minus: '-'>,
 <TokenTypes.Multiply: '*'>,
 <TokenTypes.Divide: '/'>,
 <TokenTypes.Greater: '>'>,
 <TokenTypes.GreaterEq: '>='>,
 <TokenTypes.Lower: '<'>,
 <TokenTypes.LowerEq: '<='>,
 <TokenTypes.Choice: '|'>,
 <TokenTypes.For: 'for'>,
 <TokenTypes.LBracket: '('>,
 <TokenTypes.RBracket: ')'>,
 <TokenTypes.Colon: ':'>,
 <TokenTypes.Comma: ','>,
 <TokenTypes.SemiColon: ';'>,
 <TokenTypes.Binding: ':='>,
 <TokenTypes.SLB: '['>,
 <TokenTypes.SRB: ']'>,
 <TokenTypes.CurlBracketL: '{'>,
 <TokenTypes.CurlBracketR: '}'>,
 <TokenTypes.Equal: '='>]

In [8]:
class Token:
    def __init__(self, type: TokenTypes, value) -> None:
        self.type = type
        self.value = value
    
    def __info__(self):
         return "{}:{}".format(self.type, self.value)

## Lexer

In [14]:
class lexicon:
    def __init__(self, input: string):
        self.input = input
        self.index = 0
        self.current_char = self.input[self.index]
    
    # moves the pointer a character forward
    def forward(self) -> None:
        self.index += 1

        # checks if index is out of range
        if (self.index >= len(self.input)):
            self.current_char = None
            return
        
        self.current_char = self.input[self.index]
    
    def backward(self) -> None:
        self.index -= 1

        # checks if index is out of range
        if self.index < 0:
            self.current_char = None
            return
        
        self.current_char = self.input[self.index]
    
    def get_int(self) -> int:
        if self.index >= len(self.input):
            return None
        
        result = self.input[self.index]

        # checks if there are multiple digits
        while True:
            self.forward()

            if self.index < len(self.input) and self.input[self.index] != None and self.input[self.index].isnumeric():
                result += self.input[self.index]
            else:
                self.backward()
                break

        return int(result)
    
    def get_var(self) -> string:
        if self.index >= len(self.input):
            return None
        
        result = self.input[self.index]

        # checks if there is a longer variable name
        while True:
            self.forward()

            if self.index < len(self.input) and self.input[self.index] != None and self.input[self.index].isalpha():
                result += self.input[self.index]
            else:
                self.backward()
                break
        
        return result
    
    def get_token(self, char: string) -> Token:
        token = self.check_for_tokentypes(char)

        if token.type != TokenTypes.EOF:
            return token
        
        if char == None:
            return token
            
        # skip spaces.
        if char == ' ':
            self.forward()
            return self.get_token(self.current_char)

        # checks if the current character is a number.
        if char.isnumeric():
            result = self.get_int()
            return Token(TokenTypes.Integer, result)
        
        if char.isalpha():
            result = self.get_var()
            token = self.check_for_tokentypes(result)
                  
        return token

    def check_for_tokentypes(self, char: string) -> Token:
         # checks if the current character is a supported token type.
        match char:
            case TokenTypes.Divide.value:
                return Token(TokenTypes.Divide, TokenTypes.Divide.value)
            case TokenTypes.Multiply.value:
                return Token(TokenTypes.Multiply, TokenTypes.Multiply.value)
            case TokenTypes.Plus.value:
                return Token(TokenTypes.Plus, TokenTypes.Plus.value)
            case TokenTypes.Minus.value:
                return Token(TokenTypes.Minus, TokenTypes.Minus.value)
            case TokenTypes.LBracket.value:
                return Token(TokenTypes.LBracket, TokenTypes.LBracket.value)
            case TokenTypes.RBracket.value:
                return Token(TokenTypes.RBracket, TokenTypes.RBracket.value)
            case TokenTypes.For.value:
                return Token(TokenTypes.For, TokenTypes.For.value) 
            case _:
                return Token(TokenTypes.EOF, None)

## Testing Lexer Types

In [15]:
lexer = lexicon("for 123 + 5 * 23")

while lexer.current_char is not None:
    token = lexer.get_token(lexer.current_char)
    print(str(token.value) + " is of the tokentype: " + str(token.type))
    lexer.forward()

for is of the tokentype: TokenTypes.For
123 is of the tokentype: TokenTypes.Integer
+ is of the tokentype: TokenTypes.Plus
5 is of the tokentype: TokenTypes.Integer
* is of the tokentype: TokenTypes.Multiply
23 is of the tokentype: TokenTypes.Integer


## Syntax Tree Nodes

In [1]:
class BaseNode:
    def __init__(self, token) -> None:
        self.token = token
    
    def visit(self, node):
        if isinstance(node, ProgramNode):
                return self.visit_programNode(node)
        elif isinstance(node, BlockNode):
                return self.visit_blockNode(node)
        elif isinstance(node, ScopeNode):
                return self.visit_scopeNode(node)
        elif isinstance(node, OperatorNode):
                return self.visit_operatorNode(node)
        elif isinstance(node, NumberNode):
                return self.visit_numberNode(node)
        elif isinstance(node, UnaryNode):
                return self.visit_unaryNode(node)
    
    def visit_programNode(self, node):
        pass

    def visit_blockNode(self, node):
        pass

    def visit_scopeNode(self, node):
        pass

    def visit_unaryNode(self, node):
        pass
        
    def visit_operatorNode(self, node):
        match node.token.type:
            case TokenTypes.Divide:
                return self.visit(node.leftNode) // self.visit(node.rightNode)
            case TokenTypes.Multiply:
                return self.visit(node.leftNode) * self.visit(node.rightNode)
            case TokenTypes.Plus:
                return self.visit(node.leftNode) + self.visit(node.rightNode)
            case TokenTypes.Minus:
                return self.visit(node.leftNode) - self.visit(node.rightNode)   

    def visit_numberNode(self, node):
        return node.value

class BlockNode:
    def __init__(self, nodes: list[BaseNode]) -> None:
        super().__init__(token)
        self.nodes = nodes

class ProgramNode(BaseNode):
    def __init__(self, node: BlockNode) -> None:
        super().__init__(token)
        self.node = node


class ScopeNode(BaseNode):
    def __init__(self, nodes: BaseNode) -> None:
        super().__init__(token)
        self.node = token.node

class NumberNode(BaseNode):
    def __init__(self, token) -> None:
        super().__init__(token)
        self.value = token.value

class OperatorNode(BaseNode):
    def __init__(self, token, leftNode: BaseNode, rightNode: BaseNode) -> None:
        super().__init__(token)
        self.leftNode = leftNode
        self.rightNode = rightNode

class UnaryNode(BaseNode):
    def __init__(self, token) -> None:
        super().__init__(token)
        self.value = token.value

#Class that takes a parsed node, containes information if node could have been parsed
class ParsedNode:
    def __init__(self, node:BaseNode, hasSyntaxError:bool ):
        self.node = node
        self.hasSyntaxError = hasSyntaxError

## Parser

In [18]:
class Parser:
    def __init__(self, lexer: lexicon) -> None:
        self.lexer = lexer
        self.current_token = lexer.get_token(self.lexer.current_char)
    
    def forward(self) -> None:
        self.lexer.forward()
        self.current_token = lexer.get_token(self.lexer.current_char)

    def parse(self) -> BaseNode:
        return self.expression()

    #########################################################
    # expressions
    #########################################################
    def expr(self):
        # choice DOT DOT choice
        pass

    def choice(self):
        # equal (| equal)*
        pass

    def equal(self):
        # gl = gl
        pass

    def gl(self):
        #arith ((GT|LT|GE|LE) arith)*
        pass

    def binding(self):
        # scope BINDING expr
        pass

    def scope(self):
        # identifier (Comma identifier)*? COLON type
        pass

    def identifier(self):
        # IDENTIFIER
        pass

    def type(self):
        # INT
        #   | TUPLE LB type (,type)* RB
        pass

    def tuple(self):
        # LB expr (comma expr)*? RB
        pass

    def indexing(self):
        # identifier SLB expr RLB
        pass

    #########################################################
    # Arithmetic calculation
    #########################################################
    def arith(self) -> BaseNode:
        # term ((PLUS|MINUS) term)*

        leftNode = self.term()
        token = self.current_token

        if token.type == TokenTypes.Plus or token.type == TokenTypes.Minus:
            self.forward()
        
            rightNode = self.term()
            return OperatorNode(token, leftNode, rightNode)
        
        return leftNode

    def term(self) -> BaseNode:
        # factor ((MUL|DIV) factor)*

        leftNode = self.number()
        token = self.current_token

        if token.type == TokenTypes.Multiply or token.type == TokenTypes.Divide:
            self.forward()
        
            rightNode = self.number()
            return OperatorNode(token, leftNode,rightNode)
        
        return leftNode

    def factor(self) -> BaseNode:
        # INTTEGER | TUPLE | (MINUS|PLUS) arith | func_call | indexing

        token = self.current_token

        if token.type == TokenTypes.Integer:
            self.forward()
            return NumberNode(token)
        
        if token.type == TokenTypes.LBracket:
            self.forward()
            node = self.expression()
            self.forward()
            return node
        
        return token
    
    #########################################################
    # statements
    #########################################################
    def program(self):
        return self.expr()

    def block(self):
        pass

    def statement_list(self):
        pass

    def statement(self):
        pass

    def func_call(self):
        pass

    def func_call_param(self):
        pass

    def func_decl(self):
        pass

    def func_decl_param(self):
        pass

    def if_statement(self):
        pass

    def for_loop(self):
        pass

    def nested_scope(self):
        pass

In [19]:
class Interpreter:
    def __init__(self, parser: Parser):
        self.parser = parser

    def interpret(self):
        tree = self.parser.parse()
        return tree.visit(tree)

In [24]:
text = "(2 * 3 + 2) * (2 + 4)"
lexer = lexicon(text)
parser = Parser(lexer)
interpreter = Interpreter(parser)
result = interpreter.interpret()
result

48