In [2]:
import glob, os, re

class JackTokenizer:
    def __init__(self, file):
        with open(file) as f:
            '''Opens the input .jack file and gets ready to tokenize it.'''
            self.code = f.readlines()
            self.code = self.removeWhiteSpace(self.code)
            
            self.tokens = self.getTokens(self.code)
            self.counter = 0
            self.current_token = ''         

    def hasMoreTokens (self):
        '''Are there more commands in the input file?'''
        
        if self.counter < len(self.tokens):
            return True
        else:
            return False

    def advance (self):
        '''Reads the next token and makes it the current 
        token. Should be called only if hasMoreCommands()
        is true. Initially there is no current command.'''

        
        self.current_token = self.tokens[self.counter]
        self.counter += 1
    
    def tokenType (self):
        '''Returns the type of the current token, as a
        constant.'''
        
        key_words = ['class', 'constructor', 'function', 'method', 'field', 'static', 'var', 'int', 'char', 'boolean', 'void', 'true', 'false', 'null', 'this', 'let', 'do', 'if', 'else', 'while', 'return']
        symbols = ['{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '=', '~']
   
        if self.current_token in key_words:
            return 'keyword'
        elif self.current_token in symbols:
            return 'symbol'
        elif self.current_token.isnumeric():
            return 'int_const'
        elif self.current_token[0] == '"':
            return 'string_const'
        else:
            return 'identifier'

    def keyWord (self):
        return self.current_token.upper()
    
    def symbol (self):
        return self.current_token
    
    def identifier (self):
        return self.current_token
    
    def intVal (self):
        return int(self.current_token)
    
    def stringVal (self):
        return self.current_token.replace('"','')

    @staticmethod
    def removeWhiteSpace (code):
        
        code_without_white_space = []
        
        for line in code:
            line = line.split('\n', 1)[0]
            line = line.split('//', 1)[0]
            line = line.split('/**',1)[0]
            line = line.split('/*',1)[0]
            line = line.strip()
            code_without_white_space.append(line)
        
        code_without_white_space = list(filter(None, code_without_white_space))
        
        return code_without_white_space

    @staticmethod
    def getTokens (code_without_white_space):
        token_list = []

        #Creating a list with all tokens
        for code_line in code_without_white_space:
            code_line = re.split('(")', code_line) #Spliting the Strings
            j = 0
            while (j < len(code_line)):
                
                #Dealing with StringConstant Tokens
                if code_line[j] == '"':
                    token_list.append('"' + code_line[j+1] + '"')
                    j += 2
                
                #Dealing with all other tokens
                else:
                    tokens = re.split('(\W)', code_line[j])
                    for token in tokens:
                        token_list.append(token)
                
                j += 1
        
        token_list = [token for token in token_list if (token != '' and token != ' ')]

        return token_list

        

In [5]:
class CodeWriter:
    def __init__(self, file, tokenizer):
        '''Opens the output file/stream and gets ready to write into it.'''
        self.file_out = open(file, "w")
        self.jmps = 0
        self.tokenizer = JackTokenizer('test.jack()') ##MODIFICAR PAR self.tokenizer = tokenizer

        self.type = ['int', 'char', 'boolean']
        self.statements = ['let', 'if', 'while', 'do', 'return']

        self.file_out.write('<tokens>')
        
    def compileClass (self):
        '''Writes to the output file the assemply code that implements the
        givem arithmetic command.'''

        # 'class'
        self.file_out.write('<class>')
        self.file_out.write('<keyword> class </keyword>')
        self.tokenizer.advance()
        
        # 'className'
        if self.tokenizer.tokenType() == 'identifier':
            self.compileTerm()
            self.tokenizer.advance()
        else:
            raise Exception("Expected an identifier" + f"instead found: '{self.tokenizer.current_token}'")

        # '{'
        if self.tokenizer.symbol() == '{':
            self.compileTerm()
            self.tokenizer.advance()
        else:
            raise Exception("Expected '{'" + f"instead found: '{self.tokenizer.current_token}'")
        
        # 'classVarDec'*
        if self.tokenizer.current_token in ['static', 'field']:
            self.file_out.write('<classVarDec>')
            
            while (self.tokenizer.current_token in ['static', 'field']):
                self.compileClassVarDec()
                self.tokenizer.advance()

            self.file_out.write('</classVarDec>')
        
        # 'subroutineDec'*
        if self.tokenizer.current_token in ['constructor', 'function', 'method']:
            self.file_out.write('<subroutineDec>')
            
            while (self.tokenizer.current_token in ['constructor', 'function', 'method']):
                self.compileSubroutine()
                self.tokenizer.advance()

            self.file_out.write('</subroutineDec>')

        # '}'
        if self.tokenizer.current_token == '}':
            self.compileTerm()
        else:
            raise Exception("Expected '}' " + f"instead found: '{self.tokenizer.current_token}'")
        
        self.file_out.write('</class>')


    def compileClassVarDec (self):
        
        # ('static' | 'field')
        self.compileTerm()
        self.tokenizer.advance()

        # type
        if self.tokenizer.current_token in self.type:
            self.compileTerm()
            self.tokenizer.advance()
        else:
            raise Exception("Expected 'int', 'char' or 'boolean' " + f"instead found: '{self.tokenizer.current_token}'")
        
        # varName
        if self.tokenizer.tokenType() == 'identifier':
                self.compileTerm()
                self.tokenizer.advance()
        else:
            raise Exception("Expected an identifier " + f"instead found: '{self.tokenizer.current_token}'")
            

        while (self.tokenizer.current_token != ';'):
            
            # ','
            if self.tokenizer.current_token == ',':
                self.compileTerm()              
            else:
                raise Exception("Expected ',' " + f"instead found: '{self.tokenizer.current_token}'")

            # varName
            if self.tokenizer.tokenType() == 'identifier':
                self.compileTerm()
                self.tokenizer.advance()
            else:
                raise Exception("Expected an identifier " + f"instead found: '{self.tokenizer.current_token}'")

        if self.tokenizer.current_token == ';':
                self.compileTerm()              
        else:
            raise Exception("Expected ';' " + f"instead found: '{self.tokenizer.current_token}'")
    
    def VarDec (self):
        
        # ('var')
        self.compileTerm()
        self.tokenizer.advance()

        # type
        if self.tokenizer.current_token in self.type:
            self.compileTerm()
            self.tokenizer.advance()
        else:
            raise Exception("Expected 'int', 'char' or 'boolean' " + f"instead found: '{self.tokenizer.current_token}'")
        
        # varName
        if self.tokenizer.tokenType() == 'identifier':
                self.compileTerm()
                self.tokenizer.advance()
        else:
            raise Exception("Expected an identifier " + f"instead found: '{self.tokenizer.current_token}'")
            

        while (self.tokenizer.current_token != ';'):
            
            # ','
            if self.tokenizer.current_token == ',':
                self.compileTerm()              
            else:
                raise Exception("Expected ',' " + f"instead found: '{self.tokenizer.current_token}'")

            # varName
            if self.tokenizer.tokenType() == 'identifier':
                self.compileTerm()
                self.tokenizer.advance()
            else:
                raise Exception("Expected an identifier " + f"instead found: '{self.tokenizer.current_token}'")

        if self.tokenizer.current_token == ';':
                self.compileTerm()              
        else:
            raise Exception("Expected ';' " + f"instead found: '{self.tokenizer.current_token}'")

    def compileTerm (self):
            token_type = self.tokenizer.tokenType()
            
            if token_type == 'keyword':  
                key_word = self.tokenizer.keyWord()
                self.file_out.write(f"<{token_type}> {key_word} </{token_type}>")

            elif token_type == 'symbol':
                symbol = self.tokenizer.symbol()
                self.file_out.write(f"<{token_type}> {symbol} </{token_type}>")
               
            elif token_type == 'identifier':
                identifier = self.tokenizer.identifier()
                self.file_out.write(f"<{token_type}> {identifier} </{token_type}>")

            elif token_type == 'int_val':
                int_val = self.tokenizer.intVal()
                self.file_out.write(f"<{token_type}> {int_val} </{token_type}>")

            elif token_type == 'string_val':
                string_val = self.tokenizer.stringVal()
                self.file_out.write(f"<{token_type}> {string_val} </{token_type}>")

    def compileSubroutine (self):
        #('constructor' | 'function' | 'method')
        self.compileTerm()
        self.tokenizer.advance()

        #('void' | 'type')
        if self.tokenizer.current_token == 'void':
            self.compileTerm()
            self.tokenizer.advance()
        elif self.tokenizer.current_token in self.type:
            self.compileTerm()
            self.tokenizer.advance()
        else:
            raise Exception("Expected 'void', 'int', 'char' or 'boolean' " + f"instead found: '{self.tokenizer.current_token}'")
        
        
        #subroutineName
        if self.tokenizer.tokenType == 'identifier':
            self.compileTerm()
            self.tokenizer.advance()
        else:
            raise Exception("Expected identifier" + f"instead found: '{self.tokenizer.current_token}'")
        
        #'('
        if self.tokenizer.symbol() == '{(}':
            self.compileTerm()
            self.tokenizer.advance()
        else:
            raise Exception("Expected '('" + f"instead found: '{self.tokenizer.current_token}'")
        
        # parameterList
        self.compileParameterList() #Does not need tokenizer.advance() after.

        #')'
        if self.tokenizer.symbol() == '{)}':
            self.compileTerm()
            self.tokenizer.advance()
        else:
            raise Exception("Expected ')' " + f"instead found: '{self.tokenizer.current_token}'")
        
        # subroutineBody
        if self.tokenizer.current_token == '{':
            self.compileSubroutineBody()
        else:
            raise Exception("Expected '{' " + f"instead found: '{self.tokenizer.current_token}'")

        
    def compileSubroutineBody(self):
        
        # '{'
        self.compileTerm()
        self.tokenizer.advance()

        # varDec*
        while self.tokenizer.current_token == 'var':
            self.varDec()
            self.tokenizer.advance()

        # statements
        self.compileStatements()
        self.tokenizer.advance()

        # '}'
        if self.tokenizer.current_token == '}':
            self.compileTerm()
        else:
            raise Exception("Expected '}' " + f"instead found: '{self.tokenizer.current_token}'")

    def compileStatements(self):
        
        while(self.tokenizer.current_token in self.statements):
            self.compileStatement()
            self.tokenizer.advance()

    def compileStatement(self):
        return
    
    def compileParameterList(self):
        self.file_out.write('</parameterList>')
        
        if self.tokenizer.current_token in self.type:
            # type
            self.compileTerm()
            self.tokenizer.advance()

            # varName
            if self.tokenizer.tokenType() == 'identifier':
                self.compileTerm()
                self.tokenizer.advance()
            else:
                raise Exception("Expected an identifier " + f"instead found: '{self.tokenizer.current_token}'")

            #(',' type varName)*
            while (self.tokenizer.current_token == ','):
                # ','
                self.compileTerm()
                self.tokenizer.advance()
                
                # type
                if self.tokenizer.current_token in self.type:
                    self.compileTerm()
                    self.tokenizer.advance()
                else: 
                    raise Exception("Expected 'void', 'int', 'char' or 'boolean' " + f"instead found: '{self.tokenizer.current_token}'")
        
                # varName
                if self.tokenizer.tokenType() == 'identifier':
                    self.compileTerm()
                    self.tokenizer.advance()
                else:
                    raise Exception("Expected an identifier " + f"instead found: '{self.tokenizer.current_token}'")
    
        self.file_out.write('</parameterList>')

    def Close (self):
        '''Closes the output file.'''

        self.file_out.write('</tokens>')
        self.file_out.close()


In [None]:
def main ():
    file_dir = input('Please, insert the file name:')

    if file_dir[-1] == '/': #Verifying if the input is a file or a folder
        os.chdir(file_dir)
        files = glob.glob("*.jack")
        dir_name = file_dir.split('/')[-2]
        
        #code_writer = CodeWriter(dir_name + '.asm')
        
        # if 'Sys.jack' in files:
        #     code_writer.writeCall (arg1='Sys.init', arg2='0')

    else:
        files = [file_dir]
        #code_writer = CodeWriter(files[0] + '.asm')

    

    for file in files:

        arg1 = ''
        arg2 = ''

        tokenizer = JackTokenizer(file)

        while (tokenizer.hasMoreTokens() == True):
            tokenizer.advance()
            token_type = tokenizer.tokenType()
            
            if token_type == 'keyword':  
                key_word = tokenizer.keyWord()
                #code_writer.writePushPop(token_type, arg1, arg2, file)

            elif token_type == 'symbol':
                symbol = tokenizer.symbol()
                #code_writer.writeArithmetic(arg1)

            elif token_type == 'identifier':
                identifier = tokenizer.identifier()
                #code_writer.writeLabel(arg1)

            elif token_type == 'int_val':
                int_val = tokenizer.intVal()
                #code_writer.writeGoto(arg1)

            elif token_type == 'string_val':
                string_val = tokenizer.stringVal()
                #code_writer.writeIf(arg1)

    #code_writer.Close()

if __name__ == "__main__":
    main()