In [7]:
#imports
import string
import re

# Task 1 - Table Driven Lexer

In [32]:
class Token():
    def __init__(self, inputValue, inputType):
        self.value = inputValue
        self.type = inputType

class Lexer:
    def __init__(self, input):
        self.code = input
        self.tokenlist = []
        self.pos = 0
        
        self.tokenTable = [
            (r"\b(float|int|bool|colour)\b", "<Type>"),
            
            (r"\b(true|false)\b", "<BooleanLiteral>"),
            (r"\d+\.\d+", "<FloatLiteral>"),
            (r"\d+", "<IntegerLiteral>"),
            (r"\#[0-9a-fA-F]{6}", "<ColourLiteral>"),

            (r"\b(__width)\b", "<PadWidth>"),
            (r"\b(__height)\b", "<PadHeight>"),

            (r"\b(__read)\b", "<__read>"),
            (r"\b(__randi)\b", "<__randi>"),

            (r"[a-zA-Z]([a-zA-Z0-9]|_)*", "<Identifier>"),

            (r"[\*/]|and", "<MultiplicativeOp>"),
            (r"[\+\-]|or", "<AdditiveOp>"),
            (r"[\<\>]|==|!=|<=|>=", "<RelationalOp>"),

            (r"\b(__print)\b", "<__print>"),
            (r"\b(__delay)\b", "<__delay>"),
            (r"\b(__pixelr)\b", "<__pixelr>"),
            (r"\b(__pixel)\b", "<__pixel>"),
            
            (r"not|let|return|if|else|while|fun", "<Keywords>"),
            (r"[\,\(\)\-\=\:\;\{\}]|->", "<Other>")
        ]
    
    def scanText(self):
        # loops while less than size of string
        while self.pos < len(self.code):
            # ignores whitespace
            if self.code[self.pos] == " ":
                self.pos += 1
            # if not whitespace
            else:
                value, type = None, None

                # goes through table of patterns
                for pattern, tokenType in self.tokenTable:
                    match = re.match(pattern, self.code[self.pos:])

                    # if they match
                    if match is not None:
                        type = tokenType
                        value = match.group()
                        break
                
                # if value and type remained empyty
                if value == None or type == None:
                    raise Exception("Error in Syntax")
                # if both value and type have been assigned
                else:
                    self.tokenlist.append(Token(value, type))
                    self.pos += len(value)

text = 'let x:int = __randi 1000;'
lexer = Lexer(text)
lexer.scanText()

print("Text: " + text + "\n")
for token in lexer.tokenlist:
    print("Value: " + token.value + "\t\tType: " + token.type)


Text: let x:int = __randi 1000;

Value: let		Type: <Identifier>
Value: x		Type: <Identifier>
Value: :		Type: <Other>
Value: int		Type: <Type>
Value: =		Type: <Other>
Value: __randi		Type: <__randi>
Value: 1000		Type: <IntegerLiteral>
Value: ;		Type: <Other>


In [9]:
# stringToTest = "let x true"
# lexer = LexerV2()

# lexer.scanCode(stringToTest)
# counter = 1
# print("Code: " + stringToTest)
# for i in lexer.tokens:
#     print("\nToken " + str(counter))
#     print("Value: " + i.value + "\nType: " + i.type)
#     counter +=1

# Task 2 - Hand-crafted LL(k) parser

# Task 3 - AST XML Generation Pass

# Task 4 - Semantic Analysis Pass


# Task 5 - PixIR Code Generation Pass
