In [18]:
import glob, os, re

class JackTokenizer:
    def __init__(self, file):
        with open(file) as f:
            '''Opens the input .jack file and gets ready to tokenize it.'''
            self.code = f.readlines()
            self.code = self.removeWhiteSpace(self.code)
            
            self.tokens = self.getTokens(self.code)
            self.counter = 0
            self.current_token = ''         

    def hasMoreTokens (self):
        '''Are there more commands in the input file?'''
        
        if self.counter < len(self.tokens):
            return True
        else:
            return False

    def advance (self):
        '''Reads the next token and makes it the current 
        token. Should be called only if hasMoreCommands()
        is true. Initially there is no current command.'''

        
        self.current_token = self.tokens[self.counter]
        self.counter += 1
    
    def tokenType (self):
        '''Returns the type of the current token, as a
        constant.'''
        
        key_words = ['class', 'constructor', 'function', 'method', 'field', 'static', 'var', 'int', 'char', 'boolean', 'void', 'true', 'false', 'null', 'this', 'let', 'do', 'if', 'else', 'while', 'return']
        symbols = ['{', '}', '(', ')', '[', ']', '.', ',', ';', '+', '-', '*', '/', '&', '|', '<', '>', '=', '~']
   
        if self.current_token in key_words:
            return 'KEYWORD'
        elif self.current_token in symbols:
            return 'SYMBOL'
        elif self.current_token.isnumeric():
            return 'INT_CONST'
        elif self.current_token[0] == '"':
            return 'STRING_CONST'
        else:
            return 'IDENTIFIER'

    def keyWord (self):
        return self.current_token.upper()
    
    def symbol (self):
        return self.current_token
    
    def identifier (self):
        return self.current_token
    
    def intVal (self):
        return int(self.current_token)
    
    def stringVal (self):
        return self.current_token.replace('"','')

    @staticmethod
    def removeWhiteSpace (code):
        
        code_without_white_space = []
        
        for line in code:
            line = line.split('\n', 1)[0]
            line = line.split('//', 1)[0]
            line = line.split('/**',1)[0]
            line = line.split('/*',1)[0]
            line = line.strip()
            code_without_white_space.append(line)
        
        code_without_white_space = list(filter(None, code_without_white_space))
        
        return code_without_white_space

    @staticmethod
    def getTokens (code_without_white_space):
        token_list = []

        #Creating a list with all tokens
        for code_line in code_without_white_space:
            code_line = re.split('(")', code_line) #Spliting the Strings
            j = 0
            while (j < len(code_line)):
                
                #Dealing with StringConstant Tokens
                if code_line[j] == '"':
                    token_list.append('"' + code_line[j+1] + '"')
                    j += 2
                
                #Dealing with all other tokens
                else:
                    tokens = re.split('(\W)', code_line[j])
                    for token in tokens:
                        token_list.append(token)
                
                j += 1
        
        token_list = [token for token in token_list if (token != '' and token != ' ')]

        return token_list

        

In [None]:
def main ():
    file_dir = input('Please, insert the file name:')

    if file_dir[-1] == '/': #Verifying if the input is a file or a folder
        os.chdir(file_dir)
        files = glob.glob("*.jack")
        dir_name = file_dir.split('/')[-2]
        
        #code_writer = CodeWriter(dir_name + '.asm')
        
        # if 'Sys.jack' in files:
        #     code_writer.writeCall (arg1='Sys.init', arg2='0')

    else:
        files = [file_dir]
        #code_writer = CodeWriter(files[0] + '.asm')

    

    for file in files:

        arg1 = ''
        arg2 = ''

        tokenizer = JackTokenizer(file)

        while (tokenizer.hasMoreTokens() == True):
            tokenizer.advance()
            token_type = tokenizer.tokenType()
            
            if token_type == 'KEYWORD':  
                key_word = tokenizer.keyWord()
                #code_writer.writePushPop(token_type, arg1, arg2, file)

            elif token_type == 'SYMBOL':
                symbol = tokenizer.symbol()
                #code_writer.writeArithmetic(arg1)

            elif token_type == 'IDENTIFIER':
                identifier = tokenizer.identifier()
                #code_writer.writeLabel(arg1)

            elif token_type == 'INT_VAL':
                int_val = tokenizer.intVal()
                #code_writer.writeGoto(arg1)

            elif token_type == 'STRING_VAL':
                string_val = tokenizer.stringVal()
                #code_writer.writeIf(arg1)

    #code_writer.Close()

if __name__ == "__main__":
    main()