In [2]:
#imports
import re

# Task 1 - Table Driven Lexer

In [57]:
class Token:
    def __init__(self, inputValue, inputType):
        self.value = inputValue
        self.type = inputType

class Lexer:
    def __init__(self, input):
        self.code = input
        self.tokenList = []
        self.pos = 0
        self.tokenTable = [
            (r"not|let|return|if|else|for|while|fun|->", "KEYWORDS"),
            (r"[\,\(\)\-\=\:\;\{\}]", "OTHER"),

            (r"\b(float|int|bool|colour)\b", "TYPE"),
            
            (r"\b(true|false)\b", "BOOLEANLITERAL"),
            (r"\d+\.\d+", "FLOATLITERAL"),
            (r"\d+", "INTEGERLITERAL"),
            (r"\#[0-9a-fA-F]{6}", "COLOURLITERAL"),

            (r"\b(__width)\b", "PADWIDTH"),
            (r"\b(__height)\b", "PADHEIGHT"),

            (r"\b(__read)\b", "__READ"),
            (r"\b(__randi)\b", "__RANDI"),

            (r"[\*/]|and|AND", "MULTIPLICATIVEOP"),
            (r"[\+]|or|OR", "ADDITIVEOP"),
            (r"[\<\>]|==|!=|<=|>=", "RELATIONALOP"),

            (r"[a-zA-Z]([a-zA-Z0-9]|_)*", "IDENTIFIER"),

            (r"\b(__print)\b", "__PRINT"),
            (r"\b(__delay)\b", "__DELAY"),
            (r"\b(__pixelr)\b", "__PIXELR"),
            (r"\b(__pixel)\b", "__PIXEL"),
        ]
        self.scanText()
        
        print("L e x e r")
        print("Text: \t" + self.code)
        print("Tokens: " + str(self.tokenList) +"\n")

    
    def scanText(self):
        # loops while less than size of string
        while self.pos < len(self.code):
            # ignores whitespace
            if self.code[self.pos] == " ":
                self.pos += 1
            # if not whitespace
            else:
                value, type = None, None

                # goes through table of patterns
                for pattern, tokenType in self.tokenTable:
                    match = re.match(pattern, self.code[self.pos:])

                    # if they match
                    if match is not None:
                        type = tokenType
                        value = match.group()
                        break
                
                # if value and type remained empyty
                if value == None or type == None:
                    raise Exception("Error in Syntax")
                # if both value and type have been assigned
                else:
                    if type == "KEYWORDS" or type == "OTHER":
                        self.tokenList.append((value.upper(), value))
                        # self.tokenList.append(Token(value, "<"+value+">"))
                    else:
                        self.tokenList.append((type, value))
                        # self.tokenList.append(Token(value, type))
                    self.pos += len(value)

In [58]:
text = "let x:int = __randi 1-10;"
lexer = Lexer(text)

L e x e r
Text: 	let x:int = __randi 1-10;
Tokens: [('LET', 'let'), ('IDENTIFIER', 'x'), (':', ':'), ('TYPE', 'int'), ('=', '='), ('__RANDI', '__randi'), ('INTEGERLITERAL', '1'), ('-', '-'), ('INTEGERLITERAL', '10'), (';', ';')]



# Task 2 - Hand-crafted LL(k) parser

In [66]:
temp = None

class ASTNode:
    def __init__(self):
        pass

class Parser:
    def __init__(self, input):
        self.lexer = Lexer(input)
        self.parseCode()

    def parseCode(self):
        # node = self.variableDecl()

        # print("P a r s e r")
        # print("AST: \t" + str(node))
        pass
            
    

    def padRead(self):
        if self.lexer.tokenList[0][0] == "__READ":
            readKey = self.lexer.tokenList.pop(0)
            expr1 = self.expr()

            if self.lexer.tokenList[0][0] == ",":
                comma = self.lexer.tokenList.pop(0)
                expr2 = self.expr()

                return ("PADREAD", (readKey, expr1, comma, expr2))


    def padRandI(self):
        if self.lexer.tokenList[0][0] == "__RANDI":
            randiKey = self.lexer.tokenList.pop(0)
            expr = self.expr()

            return ("PADRANDI", (randiKey, expr))


    def literal(self):
        return ("LITERAL", self.lexer.tokenList.pop(0))


    def actualParams(self):
        node = self.expr()

        while self.lexer.tokenList and self.lexer.tokenList[0][0] == ",":
            comma = self.lexer.tokenList.pop(0)
            nextExpr = self.expr()
            node = (node, comma, nextExpr)
        
        return ("ACTUALPARAMS", node)


    def functionCall(self):
        if self.lexer.tokenList[0][0] == "IDENTIFIER":
            iden = self.lexer.tokenList.pop(0)

            if self.lexer.tokenList[0][0] == "(":
                lBracket = self.lexer.tokenList.pop(0)
                
                if self.lexer.tokenList[0][0] != ")":
                    
                    actParams = self.actualParams()
                
                    if self.lexer.tokenList[0][0] == ")":
                        rBracket = self.lexer.tokenList.pop(0)
                
                        return ("FUNCTIONCALL", (iden, lBracket, actParams, rBracket))
                else:
                    rBracket = self.lexer.tokenList.pop(0)

                    return ("FUNCTIONCALL", (iden, lBracket, rBracket))


    def subExpr(self):
        if self.lexer.tokenList[0][0] == "(":
            rBracket = self.lexer.tokenList.pop(0)
            expr = self.expr()

            if self.lexer.tokenList[0][0] == ")":
                lBracket = self.lexer.tokenList.pop(0)
                
                return ("SUBEXPR", (rBracket, expr, rBracket))


    def unary(self):
        if self.lexer.tokenList[0][0] in ["-", "NOT"]:
            sym = self.lexer.tokenList.pop(0)
            expr = self.expr()

            return ("UNARY", (sym, expr))


    def factor(self):
        node = None

        # LITERAL
        if self.lexer.tokenList[0][0] in ["BOOLEANLITERAL", "INTEGERLITERAL", "FLOATLITERAL", "COLOURLITERAL"]:
            node = self.literal()

        # IDENTIFIER & FUNCTIONCALL
        elif self.lexer.tokenList[0][0] == "IDENTIFIER":   
                     
            if len(self.lexer.tokenList) > 1:
                if self.lexer.tokenList[1][0] == "(":
                    node = self.functionCall()
                else:
                    node = self.lexer.tokenList.pop(0)
            else:
                node = self.lexer.tokenList.pop(0)

        # SUBEXPR
        elif self.lexer.tokenList[0][0] == "(":
            node = self.subExpr()

        # UNARY
        elif self.lexer.tokenList[0][0] == "-" or self.lexer.tokenList[0][0] == "NOT":
            node = self.unary()

        # PADRANDI
        elif self.lexer.tokenList[0][0] == "__RANDI":
            node = self.padRandI()

        # PADWIDTH
        elif self.lexer.tokenList[0][0] == "PADWIDTH":
            node = self.lexer.tokenList.pop(0)

        # PADHEIGHT
        elif self.lexer.tokenList[0][0] == "PADHEIGHT":
            node = self.lexer.tokenList.pop(0)

        #PADREAD
        elif self.lexer.tokenList[0][0] == "__READ":
            node = self.padRead()

        else:
            raise Exception ("Error in Parser")
        
        return ("FACTOR", node)
    

    def term(self):
        node = self.factor()
        
        while self.lexer.tokenList and self.lexer.tokenList[0][0] == "MULTIPLICATIVEOP":
            multOp = self.lexer.tokenList.pop(0)
            nextF = self.factor()
            
            node = node, multOp, nextF
        
        return ("TERM", node)
        

    def simpleExpr(self):
        node = self.term()
        
        while self.lexer.tokenList and self.lexer.tokenList[0][0] == "ADDITIVEOP":
            addOp = self.lexer.tokenList.pop(0)
            nextT = self.term()
            
            node = node, addOp, nextT
        
        return ("SIMPLEEXPR", (node))


    def expr(self):
        node = self.simpleExpr()
        
        while self.lexer.tokenList and self.lexer.tokenList[0][0] == "RELATIONALOP":
            addOp = self.lexer.tokenList.pop(0)
            nextT = self.simpleExpr()
            
            node = node, addOp, nextT
        
        return ("EXPR", (node))


    def assignment(self):
        if self.lexer.tokenList[0][0] == "IDENTIFIER":
            iden = self.lexer.tokenList.pop(0)

            if self.lexer.tokenList[0][0] == "=":
                equals = self.lexer.tokenList.pop(0)
                expr = self.expr()

                return ("ASSIGNMENT", (iden, equals, expr))


    def variableDecl(self):
        if self.lexer.tokenList[0][0] == "LET":
            let = self.lexer.tokenList.pop(0)
            
            if self.lexer.tokenList[0][0] == "IDENTIFIER":
                iden = self.lexer.tokenList.pop(0)
                
                if self.lexer.tokenList[0][0] == ":":
                    colon = self.lexer.tokenList.pop(0)

                    if self.lexer.tokenList[0][0] == "TYPE":
                        type = self.lexer.tokenList.pop(0)

                        if self.lexer.tokenList[0][0] == "=":
                            equals = self.lexer.tokenList.pop(0)
                            expr = self.expr()
            
                            return ("VARIABLEDECL", (let, iden, colon, type, equals, expr))


    def printStatement(self):
        if self.lexer.tokenList[0][0] == "__PRINT":
            printKey = self.lexer.tokenList.pop(0)
            expr = self.expr()

            return ("PRINTSTATEMENT", (printKey, expr))


    def delayStatement(self):
        if self.lexer.tokenList[0][0] == "__DELAY":
            printKey = self.lexer.tokenList.pop(0)
            expr = self.expr()

            return ("DELAYSTATEMENT", (printKey, expr))


    def pixelStatement(self):
        if self.lexer.tokenList[0][0] == "__PIXELR":
            pixelRKey = self.lexer.tokenList.pop(0)
            expr1 = self.expr()

            if self.lexer.tokenList[0][0] == ",":
                comma1 = self.lexer.tokenList.pop(0)
                expr2 = self.expr()

                if self.lexer.tokenList[0][0] == ",":
                    comma2 = self.lexer.tokenList.pop(0)
                    expr3 = self.expr()

                    if self.lexer.tokenList[0][0] == ",":
                        comma3 = self.lexer.tokenList.pop(0)
                        expr4 = self.expr()

                        if self.lexer.tokenList[0][0] == ",":
                            comma4 = self.lexer.tokenList.pop(0)
                            expr5 = self.expr()

                            return ("PIXELSTATEMENT", (pixelRKey, expr1, comma1, expr2, comma2, expr3, comma3, expr4, comma4, expr5))
        
        elif self.lexer.tokenList[0][0] == "__PIXEL":
            pixelKey = self.lexer.tokenList.pop(0)
            expr1 = self.expr()

            if self.lexer.tokenList[0][0] == ",":
                comma1 = self.lexer.tokenList.pop(0)
                expr2 = self.expr()

                if self.lexer.tokenList[0][0] == ",":
                    comma2 = self.lexer.tokenList.pop(0)
                    expr3 = self.expr()

                    return ("PIXELSTATEMENT", (pixelKey, expr1, comma1, expr2, comma2, expr3))


    def rtrnStatement(self):
        if self.lexer.tokenList[0][0] == "RETURN":
            returnKey = self.lexer.tokenList.pop(0)
            expr = self.expr()

            return ("RTRNSTATEMENT", (returnKey, expr))


    def ifStatement(self):
        if self.lexer.tokenList[0][0] == "IF":
            ifKey = self.lexer.tokenList.pop(0)

            if self.lexer.tokenList[0][0] == "(":
                lBracket = self.lexer.tokenList.pop(0)
                expr = self.expr()
                
                if self.lexer.tokenList[0][0] == ")":
                    rBracket = self.lexer.tokenList.pop(0)
                    block1 = self.block()
                    
                    if self.lexer.tokenList and self.lexer.tokenList[0][0] == "ELSE":
                        elseKey = self.lexer.tokenList.pop(0)
                        block2 = self.block()

                        return ("IFSTATEMENT", (ifKey, lBracket, expr, rBracket, block1, elseKey, block2))

                    else:
                        return ("IFSTATEMENT", (ifKey, lBracket, expr, rBracket, block1))


    def forStatement(self):
        if self.lexer.tokenList[0][0] == "FOR":
            forKey = self.lexer.tokenList.pop(0)

            if self.lexer.tokenList[0][0] == "(":
                lBracket = self.lexer.tokenList.pop(0)

                if self.lexer.tokenList[0][0] == "LET":
                    varDec = self.variableDecl()

                    if self.lexer.tokenList[0][0] == ";":
                        semiColon1 = self.lexer.tokenList.pop(0)
                        expr = self.expr()
                        semiColon2 = self.lexer.tokenList.pop(0)

                        if self.lexer.tokenList[0][0] == "IDENTIFIER":
                            ass = self.assignment()
                            
                            if self.lexer.tokenList[0][0] == ")":
                                rBracket = self.lexer.tokenList.pop(0)
                                block = self.block()

                                return ("FORSTATEMENT", (forKey, lBracket, varDec, semiColon1, expr, semiColon2, ass, rBracket, block))
                            
                        elif self.lexer.tokenList[0][0] == ")":
                            rBracket = self.lexer.tokenList.pop(0)
                            block = self.block()

                            return ("FORSTATEMENT", (forKey, lBracket, varDec, semiColon1, expr, semiColon2, rBracket, block)) 
                        
                elif self.lexer.tokenList[0][0] == ";":
                    semiColon1 = self.lexer.tokenList.pop(0)
                    expr = self.expr()
                    semiColon2 = self.lexer.tokenList.pop(0)

                    if self.lexer.tokenList[0][0] == "IDENTIFIER":
                        ass = self.assignment()
                        
                        if self.lexer.tokenList[0][0] == ")":
                            rBracket = self.lexer.tokenList.pop(0)
                            block = self.block()

                            return ("FORSTATEMENT", (forKey, lBracket, semiColon1, expr, semiColon2, ass, rBracket, block))
                        
                    elif self.lexer.tokenList[0][0] == ")":
                        rBracket = self.lexer.tokenList.pop(0)
                        block = self.block()

                        return ("FORSTATEMENT", (forKey, lBracket, semiColon1, expr, semiColon2, rBracket, block)) 


    def whileStatement(self):
        if self.lexer.tokenList[0][0] == "WHILE":
            whileKey = self.lexer.tokenList.pop(0)

            if self.lexer.tokenList[0][0] == "(":
                lBracket = self.lexer.tokenList.pop(0)
                expr = self.expr()

                if self.lexer.tokenList[0][0] == ")":
                    rBracket = self.lexer.tokenList.pop(0)
                    block = self.block()

                    return ("WHILESTATEMENT", (whileKey, lBracket, expr, rBracket, block))


    def formalParam(self):
        if self.lexer.tokenList[0][0] == "IDENTIFIER":
            iden = self.lexer.tokenList.pop(0)

            if self.lexer.tokenList[0][0] == ":":
                colon = self.lexer.tokenList.pop(0)

                if self.lexer.tokenList[0][0] == "TYPE":
                    type = self.lexer.tokenList.pop(0)

                    return ("FORMALPARAM",iden, colon, type)

   
    def formalParams(self):
        node = self.formalParam()

        while self.lexer.tokenList and self.lexer.tokenList[0][0] == ",":
            comma = self.lexer.tokenList.pop(0)
            nextFP = self.formalParam()
            node = (node, comma, nextFP)
        
        return ("FORMALPARAMS", node)

   
    def functionDecl(self):
        if self.lexer.tokenList[0][0] == "FUN":
            funKey = self.lexer.tokenList.pop(0)
            
            if self.lexer.tokenList[0][0] == "IDENTIFIER":
                iden = self.lexer.tokenList.pop(0)

                if self.lexer.tokenList[0][0] == "(":
                    lBracket = self.lexer.tokenList.pop(0)

                    if self.lexer.tokenList[0][0] == "IDENTIFIER":
                        fParams = self.formalParams()

                        if self.lexer.tokenList[0][0] == ")":
                            rBracket = self.lexer.tokenList.pop(0)
                             
                            if self.lexer.tokenList[0][0] == "->":
                                arrow = self.lexer.tokenList.pop(0)

                                if self.lexer.tokenList[0][0] == "TYPE":
                                    type = self.lexer.tokenList.pop(0)
                                    block = self.block

                                    return ("FUNCTIONDECL", (funKey, iden, lBracket, fParams, rBracket, arrow, type, block))
                                
                    elif self.lexer.tokenList[0][0] == ")":
                            rBracket = self.lexer.tokenList.pop(0)
                             
                            if self.lexer.tokenList[0][0] == "->":
                                arrow = self.lexer.tokenList.pop(0)

                                if self.lexer.tokenList[0][0] == "TYPE":
                                    type = self.lexer.tokenList.pop(0)
                                    block = self.block

                                    return ("FUNCTIONDECL", (funKey, iden, lBracket, rBracket, arrow, type, block))



   
    def statement(self):
        node = ()

        # VARIABLEDECL
        if self.lexer.tokenList[0][0] == "LET":
            varDec = self.variableDecl()

            if self.lexer.tokenList[0][0] == ";":
                semiColon = self.lexer.tokenList.pop(0)
                node = (varDec, semiColon)

        # ASSIGNMENT
        elif self.lexer.tokenList[0][0] == "IDENTIFIER":
            ass = self.assignment()
            if self.lexer.tokenList[0][0] == ";":
                semiColon = self.lexer.tokenList.pop(0)
                node = (ass, semiColon)

        # PRINTSTATEMENT
        elif self.lexer.tokenList[0][0] == "__PRINT":
            printS = self.printStatement()
            if self.lexer.tokenList[0][0] == ";":
                semiColon = self.lexer.tokenList.pop(0)
                node = (printS, semiColon)

        # DELAYSTATEMENT
        elif self.lexer.tokenList[0][0] == "__DELAY":
            delayS = self.delayStatement()
            if self.lexer.tokenList[0][0] == ";":
                semiColon = self.lexer.tokenList.pop(0)
                node = (delayS, semiColon)

        # PIXELSTATEMENT
        elif self.lexer.tokenList[0][0] in ["__PIXEL", "__PIXELR"]:
            pixelS = self.pixelStatement()
            if self.lexer.tokenList[0][0] == ";":
                semiColon = self.lexer.tokenList.pop(0)
                node = (pixelS, semiColon)

        # IFSTATEMENT
        elif self.lexer.tokenList[0][0] == "IF":
            ifS = self.ifStatement()
            node = (ifS)

        # FORSTATEMENT
        elif self.lexer.tokenList[0][0] == "FOR":
            forS = self.forStatement()
            node = (forS)

        # WHILESTATEMENT
        elif self.lexer.tokenList[0][0] == "WHILE":
            whileS = self.whileStatement()
            node = (whileS) 

        elif self.lexer.tokenList[0][0] == "RETURN":
            rtrnS = self.rtrnStatement()
            if self.lexer.tokenList[0][0] == ";":
                semiColon = self.lexer.tokenList.pop(0)
                node = (rtrnS, semiColon)

        # FUNCTIONDECL
        elif self.lexer.tokenList[0][0] == "FUN":
            funDecl = self.functionDecl()
            node = (funDecl) 

        # BLOCK
        elif self.lexer.tokenList[0][0] == "{":
            block = self.block()
            node = (block) 
        else:
            print(self.lexer.tokenList[0][0])
            raise Exception ("Error in Parser")
        
        return ("STATEMENT", (node))

   
    def block(self):
        node = ()
        if self.lexer.tokenList[0][0] == "{":
            lCurly = self.lexer.tokenList.pop(0)

            if self.lexer.tokenList[0][0] != "}":

                node = ()
                while self.lexer.tokenList and self.lexer.tokenList[0][0] != "}":
                    temp = (self.statement())
                    if len(node) == 0:
                        node = temp
                    else:
                        node = (node, temp)
                
                rCurly = self.lexer.tokenList.pop(0)
                node = lCurly, node, rCurly
            else:
                rCurly = self.lexer.tokenList.pop(0)
                node = lCurly, rCurly
            
            return ("BLOCK", node)

   
    def program(self):
        node = ()
        while self.lexer.tokenList:
            temp = (self.statement())
            if len(node) == 0:
                node = temp
            else:
                node = (node, temp)

        return ("PROGRAM", node)

In [78]:
text1 = "fun XGreaterY (x:int, y:int) -> bool{ let ans:bool = true; if(y>x) {ans = false;} return ans; }"

parser = Parser(text1)

node = parser.program()

print("P a r s e r")
print("AST: \t" + str(node))

L e x e r
Text: 	fun XGreaterY (x:int, y:int) -> bool{ let ans:bool = true; if(y>x) {ans = false;} return ans; }
Tokens: [('FUN', 'fun'), ('IDENTIFIER', 'XGreaterY'), ('(', '('), ('IDENTIFIER', 'x'), (':', ':'), ('TYPE', 'int'), (',', ','), ('IDENTIFIER', 'y'), (':', ':'), ('TYPE', 'int'), (')', ')'), ('->', '->'), ('TYPE', 'bool'), ('{', '{'), ('LET', 'let'), ('IDENTIFIER', 'ans'), (':', ':'), ('TYPE', 'bool'), ('=', '='), ('BOOLEANLITERAL', 'true'), (';', ';'), ('IF', 'if'), ('(', '('), ('IDENTIFIER', 'y'), ('RELATIONALOP', '>'), ('IDENTIFIER', 'x'), (')', ')'), ('{', '{'), ('IDENTIFIER', 'ans'), ('=', '='), ('BOOLEANLITERAL', 'false'), (';', ';'), ('}', '}'), ('RETURN', 'return'), ('IDENTIFIER', 'ans'), (';', ';'), ('}', '}')]

P a r s e r
AST: 	('PROGRAM', (('STATEMENT', ('FUNCTIONDECL', (('FUN', 'fun'), ('IDENTIFIER', 'XGreaterY'), ('(', '('), ('FORMALPARAMS', (('FORMALPARAM', ('IDENTIFIER', 'x'), (':', ':'), ('TYPE', 'int')), (',', ','), ('FORMALPARAM', ('IDENTIFIER', 'y'), (':',

# Task 3 - AST XML Generation Pass

# Task 4 - Semantic Analysis Pass


# Task 5 - PixIR Code Generation Pass
